TL;DR – PCA reduces high-dimensional yield curves and volatility surfaces to key factors. This guide covers level-slope-curvature decomposition, volatility surface PCA, PCA-based hedging, and trading strategies with production code.
PCA captures most variation with few factors:
Key insight: Financial data is highly correlated. PCA finds orthogonal drivers.
Given data matrix (n samples × p features):
Properties:
1import numpy as np
2import matplotlib.pyplot as plt
3from scipy.linalg import eigh
4
5class PCA:
6 """Principal Component Analysis."""
7
8 def __init__(self, n_components=None):
9 self.n_components = n_components
10 self.components = None
11 self.explained_variance = None
12 self.mean = None
13
14 def fit(self, X):
15 """
16 Fit PCA to data.
17
18 Args:
19 X: Data matrix (n_samples, n_features)
20 """
21 # Center data
22 self.mean = np.mean(X, axis=0)
23 X_centered = X - self.mean
24
25 # Covariance matrix
26 cov = np.cov(X_centered.T)
27
28 # Eigendecomposition
29 eigenvalues, eigenvectors = eigh(cov)
30
31 # Sort by eigenvalue (descending)
32 idx = np.argsort(eigenvalues)[::-1]
33 eigenvalues = eigenvalues[idx]
34 eigenvectors = eigenvectors[:, idx]
35
36 # Store components
37 if self.n_components is None:
38 self.n_components = len(eigenvalues)
39
40 self.components = eigenvectors[:, :self.n_components]
41 self.explained_variance = eigenvalues[:self.n_components]
42
43 return self
44
45 def transform(self, X):
46 """Project data onto principal components."""
47 X_centered = X - self.mean
48 return X_centered @ self.components
49
50 def inverse_transform(self, X_transformed):
51 """Reconstruct data from principal components."""
52 return X_transformed @ self.components.T + self.mean
53
54 def explained_variance_ratio(self):
55 """Proportion of variance explained by each component."""
56 return self.explained_variance / np.sum(self.explained_variance)
57
58# Example: Simple 2D data
59np.random.seed(42)
60X = np.random.multivariate_normal([0, 0], [[2, 1.5], [1.5, 1]], 100)
61
62pca = PCA(n_components=2)
63pca.fit(X)
64X_transformed = pca.transform(X)
65
66print("Explained variance ratio:", pca.explained_variance_ratio())
67
68# Visualize
69plt.figure(figsize=(12, 5))
70
71plt.subplot(1, 2, 1)
72plt.scatter(X[:, 0], X[:, 1], alpha=0.5)
73plt.arrow(0, 0, pca.components[0, 0]*3, pca.components[1, 0]*3,
74 head_width=0.2, head_length=0.2, fc='red', ec='red', linewidth=2)
75plt.arrow(0, 0, pca.components[0, 1]*2, pca.components[1, 1]*2,
76 head_width=0.2, head_length=0.2, fc='blue', ec='blue', linewidth=2)
77plt.xlabel('Feature 1')
78plt.ylabel('Feature 2')
79plt.title('Original Data with PC Directions')
80plt.grid(True, alpha=0.3)
81plt.axis('equal')
82
83plt.subplot(1, 2, 2)
84plt.scatter(X_transformed[:, 0], X_transformed[:, 1], alpha=0.5)
85plt.xlabel('PC1')
86plt.ylabel('PC2')
87plt.title('Transformed Data')
88plt.grid(True, alpha=0.3)
89plt.axis('equal')
90
91plt.tight_layout()
92plt.show()
93Empirical finding: 3 factors explain 99%+ of yield curve movements:
1class YieldCurvePCA:
2 """PCA for yield curve analysis."""
3
4 def __init__(self, n_components=3):
5 self.pca = PCA(n_components=n_components)
6 self.maturities = None
7
8 def fit(self, yield_curves, maturities):
9 """
10 Fit PCA to historical yield curves.
11
12 Args:
13 yield_curves: Array of shape (n_dates, n_maturities)
14 maturities: Array of maturities (in years)
15 """
16 self.maturities = maturities
17 self.pca.fit(yield_curves)
18
19 return self
20
21 def get_factors(self):
22 """Get level, slope, curvature factors."""
23 return {
24 'level': self.pca.components[:, 0],
25 'slope': self.pca.components[:, 1],
26 'curvature': self.pca.components[:, 2]
27 }
28
29 def decompose(self, yield_curve):
30 """
31 Decompose yield curve into factors.
32
33 Returns:
34 factor_loadings: [level, slope, curvature]
35 """
36 return self.pca.transform(yield_curve.reshape(1, -1))[0]
37
38 def reconstruct(self, factor_loadings):
39 """Reconstruct yield curve from factor loadings."""
40 return self.pca.inverse_transform(factor_loadings.reshape(1, -1))[0]
41
42 def plot_factors(self):
43 """Visualize the three main factors."""
44 factors = self.get_factors()
45
46 fig, axes = plt.subplots(3, 1, figsize=(12, 10))
47
48 axes[0].plot(self.maturities, factors['level'], 'o-', linewidth=2)
49 axes[0].set_title('PC1: Level (Parallel Shift)')
50 axes[0].set_ylabel('Loading')
51 axes[0].grid(True, alpha=0.3)
52 axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.3)
53
54 axes[1].plot(self.maturities, factors['slope'], 'o-', linewidth=2, color='orange')
55 axes[1].set_title('PC2: Slope (Steepening/Flattening)')
56 axes[1].set_ylabel('Loading')
57 axes[1].grid(True, alpha=0.3)
58 axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.3)
59
60 axes[2].plot(self.maturities, factors['curvature'], 'o-', linewidth=2, color='green')
61 axes[2].set_title('PC3: Curvature (Butterfly)')
62 axes[2].set_xlabel('Maturity (years)')
63 axes[2].set_ylabel('Loading')
64 axes[2].grid(True, alpha=0.3)
65 axes[2].axhline(y=0, color='black', linestyle='--', alpha=0.3)
66
67 plt.tight_layout()
68 plt.show()
69
70# Example: Generate synthetic yield curve data
71np.random.seed(42)
72n_dates = 252
73maturities = np.array([0.25, 0.5, 1, 2, 3, 5, 7, 10, 20, 30])
74n_maturities = len(maturities)
75
76# Base curve
77base_curve = 0.02 + 0.03 * (1 - np.exp(-maturities / 2))
78
79# Generate movements
80level_shocks = np.random.normal(0, 0.005, n_dates)
81slope_shocks = np.random.normal(0, 0.003, n_dates)
82curve_shocks = np.random.normal(0, 0.001, n_dates)
83
84yield_curves = np.zeros((n_dates, n_maturities))
85for i in range(n_dates):
86 # Level shift
87 level = base_curve + level_shocks[i]
88
89 # Slope shift (affects long end more)
90 slope = slope_shocks[i] * maturities / 30
91
92 # Curvature (affects middle more)
93 curvature = curve_shocks[i] * np.exp(-(maturities - 5)**2 / 10)
94
95 yield_curves[i] = level + slope + curvature
96
97# Fit PCA
98yc_pca = YieldCurvePCA(n_components=3)
99yc_pca.fit(yield_curves, maturities)
100
101# Print explained variance
102print("Explained variance ratio:")
103for i, ratio in enumerate(yc_pca.pca.explained_variance_ratio()):
104 print(" PC{}: {:.2%}".format(i+1, ratio))
105
106# Plot factors
107yc_pca.plot_factors()
108
109# Decompose a specific curve
110sample_curve = yield_curves[100]
111loadings = yc_pca.decompose(sample_curve)
112reconstructed = yc_pca.reconstruct(loadings)
113
114plt.figure(figsize=(10, 6))
115plt.plot(maturities, sample_curve, 'o-', label='Original', linewidth=2)
116plt.plot(maturities, reconstructed, 's--', label='Reconstructed (3 PCs)', linewidth=2)
117plt.xlabel('Maturity (years)')
118plt.ylabel('Yield')
119plt.title('Yield Curve Reconstruction')
120plt.legend()
121plt.grid(True, alpha=0.3)
122plt.show()
123Hedge against level shifts using duration:
1def compute_pca_hedge_ratios(portfolio_cashflows, hedge_instruments, yc_pca):
2 """
3 Compute hedge ratios to neutralize PC exposures.
4
5 Args:
6 portfolio_cashflows: Dict {maturity: cashflow}
7 hedge_instruments: List of (maturity, price, duration) tuples
8 yc_pca: Fitted YieldCurvePCA
9
10 Returns:
11 hedge_ratios: Array of hedge quantities
12 """
13 # Portfolio PC sensitivities
14 portfolio_pv01 = {} # PV01 at each maturity
15
16 for maturity, cf in portfolio_cashflows.items():
17 # Approximate PV01
18 portfolio_pv01[maturity] = cf * maturity * 0.0001
19
20 # Convert to vector aligned with PCA maturities
21 pv01_vector = np.zeros(len(yc_pca.maturities))
22 for i, mat in enumerate(yc_pca.maturities):
23 if mat in portfolio_pv01:
24 pv01_vector[i] = portfolio_pv01[mat]
25
26 # PC exposures
27 pc_exposures = yc_pca.pca.components.T @ pv01_vector
28
29 # Hedge instrument PC exposures
30 n_hedges = len(hedge_instruments)
31 hedge_matrix = np.zeros((3, n_hedges))
32
33 for j, (mat, price, duration) in enumerate(hedge_instruments):
34 # Find closest maturity in PCA
35 idx = np.argmin(np.abs(yc_pca.maturities - mat))
36
37 hedge_pv01 = np.zeros(len(yc_pca.maturities))
38 hedge_pv01[idx] = price * duration * 0.0001
39
40 hedge_matrix[:, j] = yc_pca.pca.components.T @ hedge_pv01
41
42 # Solve for hedge ratios
43 hedge_ratios = np.linalg.lstsq(hedge_matrix.T, -pc_exposures, rcond=None)[0]
44
45 return hedge_ratios
46
47# Example
48portfolio_cf = {2: 1000, 5: 2000, 10: 1500}
49hedges = [(2, 98, 1.9), (5, 95, 4.5), (10, 90, 8.5)] # (mat, price, duration)
50
51hedge_ratios = compute_pca_hedge_ratios(portfolio_cf, hedges, yc_pca)
52print("Hedge ratios:", hedge_ratios)
53Options across strikes and maturities form a surface. PCA reduces dimensionality:
1class VolatilitySurfacePCA:
2 """PCA for implied volatility surfaces."""
3
4 def __init__(self, n_components=5):
5 self.pca = PCA(n_components=n_components)
6 self.strikes = None
7 self.maturities = None
8
9 def fit(self, vol_surfaces, strikes, maturities):
10 """
11 Fit PCA to historical vol surfaces.
12
13 Args:
14 vol_surfaces: Array of shape (n_dates, n_strikes * n_maturities)
15 strikes: Array of strike prices
16 maturities: Array of maturities
17 """
18 self.strikes = strikes
19 self.maturities = maturities
20 self.pca.fit(vol_surfaces)
21
22 return self
23
24 def decompose(self, vol_surface):
25 """Decompose surface into factor loadings."""
26 return self.pca.transform(vol_surface.reshape(1, -1))[0]
27
28 def reconstruct(self, factor_loadings):
29 """Reconstruct surface from loadings."""
30 flat = self.pca.inverse_transform(factor_loadings.reshape(1, -1))[0]
31 return flat.reshape(len(self.strikes), len(self.maturities))
32
33 def plot_factors(self, n_factors=3):
34 """Visualize main factors."""
35 fig = plt.figure(figsize=(15, 4))
36
37 for i in range(n_factors):
38 ax = fig.add_subplot(1, n_factors, i+1, projection='3d')
39
40 factor = self.pca.components[:, i].reshape(
41 len(self.strikes), len(self.maturities)
42 )
43
44 X, Y = np.meshgrid(self.maturities, self.strikes)
45 ax.plot_surface(X, Y, factor, cmap='viridis', alpha=0.8)
46
47 ax.set_xlabel('Maturity')
48 ax.set_ylabel('Strike')
49 ax.set_zlabel('Loading')
50 ax.set_title('PC{} ({:.1%})'.format(
51 i+1, self.pca.explained_variance_ratio()[i]
52 ))
53
54 plt.tight_layout()
55 plt.show()
56
57# Example: Generate synthetic vol surface
58n_dates = 100
59strikes = np.linspace(80, 120, 9)
60maturities = np.array([0.25, 0.5, 1, 2])
61
62vol_surfaces = np.zeros((n_dates, len(strikes) * len(maturities)))
63
64for t in range(n_dates):
65 # Base ATM vol
66 atm_vol = 0.2 + 0.05 * np.sin(t / 10)
67
68 for i, K in enumerate(strikes):
69 for j, T in enumerate(maturities):
70 # Smile: quadratic in log-moneyness
71 log_moneyness = np.log(K / 100)
72 smile = 0.1 * log_moneyness**2
73
74 # Term structure
75 term = 0.05 * np.sqrt(T)
76
77 # Add noise
78 noise = np.random.normal(0, 0.01)
79
80 idx = i * len(maturities) + j
81 vol_surfaces[t, idx] = atm_vol + smile + term + noise
82
83# Fit PCA
84vol_pca = VolatilitySurfacePCA(n_components=5)
85vol_pca.fit(vol_surfaces, strikes, maturities)
86
87print("Explained variance:")
88for i, ratio in enumerate(vol_pca.pca.explained_variance_ratio()):
89 print(" PC{}: {:.2%}".format(i+1, ratio))
90
91# Plot factors
92vol_pca.plot_factors(n_factors=3)
93Identify mispricings using PCA residuals:
1def find_relative_value_opportunities(yield_curves, yc_pca, threshold=2.0):
2 """
3 Find yield curve points trading rich/cheap relative to PCA model.
4
5 Args:
6 yield_curves: Recent yield curves
7 yc_pca: Fitted PCA model
8 threshold: Number of standard deviations
9
10 Returns:
11 opportunities: List of (date, maturity, z_score)
12 """
13 opportunities = []
14
15 for date, curve in enumerate(yield_curves):
16 # Reconstruct from PCA
17 loadings = yc_pca.decompose(curve)
18 reconstructed = yc_pca.reconstruct(loadings)
19
20 # Residuals
21 residuals = curve - reconstructed
22
23 # Standardize
24 std = np.std(residuals)
25 z_scores = residuals / std
26
27 # Find outliers
28 for i, (mat, z) in enumerate(zip(yc_pca.maturities, z_scores)):
29 if abs(z) > threshold:
30 opportunities.append({
31 'date': date,
32 'maturity': mat,
33 'z_score': z,
34 'signal': 'rich' if z > 0 else 'cheap'
35 })
36
37 return opportunities
38
39# Find opportunities
40opportunities = find_relative_value_opportunities(yield_curves[-50:], yc_pca)
41
42print("Relative value opportunities:")
43for opp in opportunities[:5]:
44 print(" Date {}, {}Y: {} (z={:.2f})".format(
45 opp['date'], opp['maturity'], opp['signal'], opp['z_score']
46 ))
47PCA is essential for dimensionality reduction in finance. Use it for yield curve analysis, volatility surface modeling, and risk factor hedging. Always validate factor stability and monitor explained variance over time.
Technical Writer
NordVarg Team is a software engineer at NordVarg specializing in high-performance financial systems and type-safe programming.
Get weekly insights on building high-performance financial systems, latest industry trends, and expert tips delivered straight to your inbox.