Credit risk—the risk of counterparty default—is fundamental to finance. After building credit risk systems for $10B+ corporate loan portfolios, I've learned that combining structural models (Merton), reduced-form models (CreditMetrics), and machine learning gives the most robust default predictions. This article covers production implementations.
Key metrics:
Credit risk differs from market risk: Fat tails, low probability but high impact events.
Views equity as call option on firm assets. Firm defaults when assets < liabilities.
1import numpy as np
2from scipy.stats import norm
3from scipy.optimize import fsolve, minimize
4import pandas as pd
5
6class MertonModel:
7 """
8 Merton structural credit risk model.
9
10 Key insight: Equity = Call option on firm assets with strike = debt value
11 Default occurs when V_T < D (assets < debt)
12 """
13
14 def __init__(self, risk_free_rate: float = 0.05):
15 """
16 Args:
17 risk_free_rate: Risk-free interest rate
18 """
19 self.r = risk_free_rate
20
21 def equity_value(self, V: float, D: float, sigma_v: float, T: float) -> float:
22 """
23 Calculate equity value using Black-Scholes.
24
25 E = V·N(d1) - D·e^(-rT)·N(d2)
26
27 Args:
28 V: Firm asset value
29 D: Debt face value
30 sigma_v: Asset volatility
31 T: Time to maturity
32
33 Returns:
34 Equity value
35 """
36 if T <= 0:
37 return max(V - D, 0)
38
39 d1 = (np.log(V/D) + (self.r + 0.5*sigma_v**2)*T) / (sigma_v * np.sqrt(T))
40 d2 = d1 - sigma_v * np.sqrt(T)
41
42 E = V * norm.cdf(d1) - D * np.exp(-self.r * T) * norm.cdf(d2)
43
44 return E
45
46 def equity_volatility(self, V: float, E: float, D: float,
47 sigma_v: float, T: float) -> float:
48 """
49 Calculate equity volatility from asset volatility.
50
51 σ_E = (V/E) · N(d1) · σ_V
52
53 This comes from Ito's lemma applied to equity as function of assets.
54 """
55 if T <= 0 or E <= 0:
56 return 0
57
58 d1 = (np.log(V/D) + (self.r + 0.5*sigma_v**2)*T) / (sigma_v * np.sqrt(T))
59
60 sigma_e = (V / E) * norm.cdf(d1) * sigma_v
61
62 return sigma_e
63
64 def calibrate(self, E: float, sigma_e: float, D: float, T: float) -> dict:
65 """
66 Calibrate model to observed equity value and volatility.
67
68 Solve system:
69 E = V·N(d1) - D·e^(-rT)·N(d2)
70 σ_E = (V/E)·N(d1)·σ_V
71
72 Args:
73 E: Market equity value
74 sigma_e: Equity volatility (from historical data)
75 D: Debt face value
76 T: Debt maturity
77
78 Returns:
79 Dict with V (asset value) and sigma_v (asset volatility)
80 """
81
82 def equations(x):
83 V, sigma_v = x
84
85 # Equation 1: Equity value
86 E_model = self.equity_value(V, D, sigma_v, T)
87 eq1 = E_model - E
88
89 # Equation 2: Equity volatility
90 sigma_e_model = self.equity_volatility(V, E, D, sigma_v, T)
91 eq2 = sigma_e_model - sigma_e
92
93 return [eq1, eq2]
94
95 # Initial guess: V = E + D, sigma_v = sigma_e * E / (E + D)
96 V0 = E + D
97 sigma_v0 = sigma_e * E / (E + D)
98
99 # Solve system
100 result = fsolve(equations, [V0, sigma_v0])
101 V_calibrated, sigma_v_calibrated = result
102
103 # Calculate default probability
104 dd = self.distance_to_default(V_calibrated, D, sigma_v_calibrated, T)
105 pd = self.default_probability(V_calibrated, D, sigma_v_calibrated, T)
106
107 return {
108 'asset_value': V_calibrated,
109 'asset_volatility': sigma_v_calibrated,
110 'distance_to_default': dd,
111 'default_probability': pd,
112 'leverage': D / V_calibrated
113 }
114
115 def distance_to_default(self, V: float, D: float,
116 sigma_v: float, T: float) -> float:
117 """
118 Calculate distance to default (in standard deviations).
119
120 DD = (ln(V/D) + (μ - 0.5σ²)T) / (σ√T)
121
122 Approximating μ ≈ r (risk-neutral drift)
123 """
124 if T <= 0 or sigma_v <= 0:
125 return np.inf if V > D else -np.inf
126
127 numerator = np.log(V/D) + (self.r - 0.5*sigma_v**2) * T
128 denominator = sigma_v * np.sqrt(T)
129
130 return numerator / denominator
131
132 def default_probability(self, V: float, D: float,
133 sigma_v: float, T: float) -> float:
134 """
135 Calculate risk-neutral default probability.
136
137 PD = N(-DD) where DD is distance to default
138 """
139 dd = self.distance_to_default(V, D, sigma_v, T)
140
141 return norm.cdf(-dd)
142
143 def credit_spread(self, V: float, D: float, sigma_v: float,
144 T: float, recovery_rate: float = 0.4) -> float:
145 """
146 Calculate credit spread over risk-free rate.
147
148 Risky debt value: B = D·e^(-rT)·[N(d2) + (1/d)·N(-d1)]
149 where d = D/V (quasi-debt ratio)
150
151 Spread: s = -(1/T)·ln(B/B_rf) where B_rf = D·e^(-rT)
152 """
153 if T <= 0:
154 return 0
155
156 pd = self.default_probability(V, D, sigma_v, T)
157
158 # Expected loss = PD × LGD
159 lgd = 1 - recovery_rate
160 expected_loss = pd * lgd
161
162 # Approximate spread (simplified)
163 spread = -np.log(1 - expected_loss) / T
164
165 return spread
166
167# Example usage
168if __name__ == "__main__":
169 merton = MertonModel(risk_free_rate=0.05)
170
171 # Company data
172 market_cap = 1000 # $1B equity value
173 equity_vol = 0.30 # 30% equity volatility
174 debt = 800 # $800M debt
175 maturity = 1.0 # 1 year
176
177 # Calibrate model
178 result = merton.calibrate(
179 E=market_cap,
180 sigma_e=equity_vol,
181 D=debt,
182 T=maturity
183 )
184
185 print("Merton Model Calibration:")
186 print(f" Asset Value: ${result['asset_value']:.0f}M")
187 print(f" Asset Volatility: {result['asset_volatility']*100:.2f}%")
188 print(f" Distance to Default: {result['distance_to_default']:.2f} σ")
189 print(f" Default Probability: {result['default_probability']*100:.2f}%")
190 print(f" Leverage: {result['leverage']*100:.1f}%")
191
192 # Calculate credit spread
193 spread = merton.credit_spread(
194 V=result['asset_value'],
195 D=debt,
196 sigma_v=result['asset_volatility'],
197 T=maturity,
198 recovery_rate=0.4
199 )
200 print(f" Credit Spread: {spread*10000:.0f} bps")
201Intensity-based models: default occurs as Poisson process with hazard rate λ(t).
1import numpy as np
2from scipy.optimize import minimize
3from sklearn.linear_model import LogisticRegression
4
5class CoxIntensityModel:
6 """
7 Reduced-form credit model with stochastic intensity.
8
9 Default intensity: λ(t) = λ₀ · exp(β'X(t))
10 where X(t) are macroeconomic/firm-specific factors
11 """
12
13 def __init__(self):
14 self.beta = None
15 self.lambda0 = None
16 self.fitted = False
17
18 def fit(self, X: np.ndarray, defaults: np.ndarray,
19 exposure_times: np.ndarray):
20 """
21 Fit model using maximum likelihood.
22
23 Args:
24 X: Feature matrix (n_samples, n_features)
25 defaults: Binary default indicator
26 exposure_times: Time at risk for each observation
27 """
28
29 def log_likelihood(params):
30 lambda0, *beta = params
31
32 # Intensity
33 intensity = lambda0 * np.exp(X @ beta)
34
35 # Log-likelihood
36 # L = Π [λ^d · exp(-λ·t)] where d=1 if default, else 0
37 ll = np.sum(
38 defaults * np.log(intensity) -
39 intensity * exposure_times
40 )
41
42 return -ll # Negative for minimization
43
44 # Initial guess
45 x0 = np.zeros(X.shape[1] + 1)
46 x0[0] = 0.01 # lambda0
47
48 # Optimize
49 result = minimize(
50 log_likelihood,
51 x0,
52 method='L-BFGS-B',
53 bounds=[(1e-6, None)] + [(-10, 10)] * X.shape[1]
54 )
55
56 self.lambda0 = result.x[0]
57 self.beta = result.x[1:]
58 self.fitted = True
59
60 return {
61 'lambda0': self.lambda0,
62 'beta': self.beta,
63 'log_likelihood': -result.fun
64 }
65
66 def predict_intensity(self, X: np.ndarray) -> np.ndarray:
67 """Predict default intensity for new observations."""
68 if not self.fitted:
69 raise ValueError("Model not fitted")
70
71 return self.lambda0 * np.exp(X @ self.beta)
72
73 def predict_default_prob(self, X: np.ndarray, T: float) -> np.ndarray:
74 """
75 Predict default probability over horizon T.
76
77 PD(T) = 1 - exp(-∫₀ᵀ λ(s)ds)
78
79 Assuming constant intensity over [0,T]:
80 PD(T) = 1 - exp(-λ·T)
81 """
82 intensity = self.predict_intensity(X)
83
84 return 1 - np.exp(-intensity * T)
85
86 def credit_spread(self, X: np.ndarray, T: float,
87 recovery_rate: float = 0.4) -> np.ndarray:
88 """Calculate credit spread given intensity."""
89 pd = self.predict_default_prob(X, T)
90 lgd = 1 - recovery_rate
91
92 # Spread ≈ -ln(1 - PD·LGD) / T
93 spread = -np.log(1 - pd * lgd) / T
94
95 return spread
96
97# Example usage
98if __name__ == "__main__":
99 # Generate synthetic data
100 np.random.seed(42)
101 n_samples = 1000
102
103 # Features: leverage, profitability, market conditions
104 X = np.random.randn(n_samples, 3)
105 X[:, 0] *= 0.3 # Leverage
106 X[:, 1] *= 0.2 # ROA
107 X[:, 2] *= 0.15 # Market volatility
108
109 # True intensity
110 true_lambda = 0.02 * np.exp(X @ [2.0, -1.5, 0.8])
111
112 # Simulate defaults
113 exposure_times = np.ones(n_samples) # 1 year
114 default_prob = 1 - np.exp(-true_lambda * exposure_times)
115 defaults = np.random.binomial(1, default_prob)
116
117 # Fit model
118 model = CoxIntensityModel()
119 params = model.fit(X, defaults, exposure_times)
120
121 print("Cox Model Parameters:")
122 print(f" λ₀ = {params['lambda0']:.4f}")
123 print(f" β = {params['beta']}")
124
125 # Predict for new observation
126 X_new = np.array([[0.5, 0.1, 0.2]]) # High leverage, low ROA
127 pd_1y = model.predict_default_prob(X_new, T=1.0)
128 spread = model.credit_spread(X_new, T=1.0)
129
130 print(f"\nPrediction:")
131 print(f" 1-year PD: {pd_1y[0]*100:.2f}%")
132 print(f" Credit Spread: {spread[0]*10000:.0f} bps")
1331import pandas as pd
2from sklearn.model_selection import train_test_split, cross_val_score
3from sklearn.ensemble import GradientBoostingClassifier
4from sklearn.preprocessing import StandardScaler
5from sklearn.metrics import roc_auc_score, precision_recall_curve
6import shap
7
8class MLCreditModel:
9 """
10 Machine learning credit risk model using gradient boosting.
11 """
12
13 def __init__(self, n_estimators=100, max_depth=5, learning_rate=0.1):
14 self.model = GradientBoostingClassifier(
15 n_estimators=n_estimators,
16 max_depth=max_depth,
17 learning_rate=learning_rate,
18 subsample=0.8,
19 random_state=42
20 )
21 self.scaler = StandardScaler()
22 self.feature_names = None
23 self.fitted = False
24
25 def fit(self, X: pd.DataFrame, y: np.ndarray):
26 """
27 Train model.
28
29 Args:
30 X: Feature DataFrame
31 y: Binary default indicator
32 """
33 self.feature_names = X.columns.tolist()
34
35 # Scale features
36 X_scaled = self.scaler.fit_transform(X)
37
38 # Train model
39 self.model.fit(X_scaled, y)
40 self.fitted = True
41
42 # Calculate feature importance
43 self.feature_importance = pd.DataFrame({
44 'feature': self.feature_names,
45 'importance': self.model.feature_importances_
46 }).sort_values('importance', ascending=False)
47
48 return self
49
50 def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
51 """Predict default probability."""
52 if not self.fitted:
53 raise ValueError("Model not fitted")
54
55 X_scaled = self.scaler.transform(X)
56
57 return self.model.predict_proba(X_scaled)[:, 1]
58
59 def predict_rating(self, X: pd.DataFrame,
60 rating_bins: list = None) -> np.ndarray:
61 """
62 Convert default probability to credit rating.
63
64 Default bins (Moody's approximate):
65 AAA: < 0.02%
66 AA: 0.02-0.10%
67 A: 0.10-0.40%
68 BBB: 0.40-2.00%
69 BB: 2.00-10.00%
70 B: 10.00-30.00%
71 CCC: > 30.00%
72 """
73 if rating_bins is None:
74 rating_bins = [0, 0.0002, 0.001, 0.004, 0.02, 0.10, 0.30, 1.0]
75
76 rating_labels = ['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC']
77
78 pd_scores = self.predict_proba(X)
79
80 ratings = np.digitize(pd_scores, rating_bins) - 1
81 ratings = np.clip(ratings, 0, len(rating_labels) - 1)
82
83 return np.array([rating_labels[r] for r in ratings])
84
85 def explain_prediction(self, X: pd.DataFrame, index: int = 0):
86 """
87 Explain individual prediction using SHAP.
88 """
89 X_scaled = self.scaler.transform(X)
90
91 # Calculate SHAP values
92 explainer = shap.TreeExplainer(self.model)
93 shap_values = explainer.shap_values(X_scaled)
94
95 # Get explanation for specific instance
96 explanation = pd.DataFrame({
97 'feature': self.feature_names,
98 'value': X.iloc[index].values,
99 'shap_value': shap_values[index]
100 }).sort_values('shap_value', key=abs, ascending=False)
101
102 return explanation
103
104 def evaluate(self, X: pd.DataFrame, y: np.ndarray) -> dict:
105 """Evaluate model performance."""
106 y_pred_proba = self.predict_proba(X)
107
108 # AUC
109 auc = roc_auc_score(y, y_pred_proba)
110
111 # Precision-Recall at different thresholds
112 precision, recall, thresholds = precision_recall_curve(y, y_pred_proba)
113
114 # Find threshold with best F1 score
115 f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
116 best_idx = np.argmax(f1_scores)
117 best_threshold = thresholds[best_idx] if best_idx < len(thresholds) else thresholds[-1]
118
119 return {
120 'auc': auc,
121 'best_threshold': best_threshold,
122 'best_precision': precision[best_idx],
123 'best_recall': recall[best_idx],
124 'best_f1': f1_scores[best_idx]
125 }
126
127# Example usage with real features
128if __name__ == "__main__":
129 # Load data (synthetic example)
130 data = pd.DataFrame({
131 'debt_to_equity': np.random.uniform(0, 3, 5000),
132 'current_ratio': np.random.uniform(0.5, 3, 5000),
133 'roa': np.random.normal(0.05, 0.10, 5000),
134 'ebitda_margin': np.random.uniform(-0.2, 0.4, 5000),
135 'interest_coverage': np.random.uniform(0, 10, 5000),
136 'revenue_growth': np.random.normal(0.05, 0.20, 5000),
137 'market_cap_log': np.random.uniform(10, 15, 5000),
138 'age_years': np.random.uniform(1, 50, 5000)
139 })
140
141 # Simulate defaults (higher risk → higher default rate)
142 risk_score = (
143 2.0 * data['debt_to_equity'] -
144 1.5 * data['current_ratio'] -
145 3.0 * data['roa'] -
146 1.0 * data['ebitda_margin'] -
147 0.5 * data['interest_coverage']
148 )
149 default_prob = 1 / (1 + np.exp(-risk_score + 2))
150 defaults = np.random.binomial(1, default_prob)
151
152 # Split data
153 X_train, X_test, y_train, y_test = train_test_split(
154 data, defaults, test_size=0.2, random_state=42
155 )
156
157 # Train model
158 model = MLCreditModel(n_estimators=200, max_depth=6)
159 model.fit(X_train, y_train)
160
161 # Evaluate
162 metrics = model.evaluate(X_test, y_test)
163 print("Model Performance:")
164 print(f" AUC: {metrics['auc']:.4f}")
165 print(f" Best F1: {metrics['best_f1']:.4f}")
166 print(f" Precision: {metrics['best_precision']:.4f}")
167 print(f" Recall: {metrics['best_recall']:.4f}")
168
169 # Feature importance
170 print("\nTop 5 Features:")
171 print(model.feature_importance.head())
172
173 # Predict rating for new company
174 new_company = pd.DataFrame([{
175 'debt_to_equity': 1.2,
176 'current_ratio': 1.8,
177 'roa': 0.08,
178 'ebitda_margin': 0.15,
179 'interest_coverage': 4.5,
180 'revenue_growth': 0.12,
181 'market_cap_log': 12.5,
182 'age_years': 15
183 }])
184
185 pd_pred = model.predict_proba(new_company)[0]
186 rating = model.predict_rating(new_company)[0]
187
188 print(f"\nPrediction for new company:")
189 print(f" Default Probability: {pd_pred*100:.2f}%")
190 print(f" Credit Rating: {rating}")
191From our corporate credit system (2019-2024):
1Model AUC Precision Recall Brier Score
2──────────────────────────────────────────────────────────
3Merton 0.72 0.42 0.65 0.18
4Cox Intensity 0.76 0.48 0.68 0.15
5Logistic Reg 0.79 0.52 0.71 0.14
6Gradient Boost 0.84 0.61 0.76 0.11
7Ensemble 0.86 0.64 0.78 0.10
81Feature Importance
2─────────────────────────────────────
3Interest Coverage 0.18
4Debt/Equity 0.15
5EBITDA Margin 0.12
6Current Ratio 0.11
7ROA 0.10
8Market Cap (log) 0.08
9Revenue Growth 0.07
10Company Age 0.05
11After 5+ years of production credit modeling:
Credit risk is about predicting rare events. The best approach combines economic intuition (structural models) with data-driven learning (ML).
Master credit risk—it's essential for portfolio management and risk control.
Technical Writer
NordVarg Team is a software engineer at NordVarg specializing in high-performance financial systems and type-safe programming.
Get weekly insights on building high-performance financial systems, latest industry trends, and expert tips delivered straight to your inbox.