Python Code for Portfolio Optimization
Chapter 4 – Financial Data: Time Series Modeling
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: March 13, 2025
Contributors:
Packages¶
The following packages are used in the examples:
# Basic finance and data manipulation import pandas as pd import numpy as np import yfinance as yf from datetime import datetime from typing import Tuple, Literal # Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python") from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020 # Time series models from statsmodels.tsa.arima.model import ARIMA from pykalman import KalmanFilter #from statsmodels.tsa.statespace.kalman_filter import KalmanFilter from arch import arch_model import pymc as pm # for stochastic volatility modeling # Plotting import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns plt.style.use('seaborn-v0_8-whitegrid') sns.set_palette("husl")
Temporal structure¶
Example of a synthetic Gaussian AR(1) time series:
# Specify an AR(1) model with given coefficients and parameters np.random.seed(42) T = 300 mu = 0.01 phi = -0.9 sigma = 0.2 # Simulate one path x = np.zeros(T) x[0] = mu for t in range(1, T): x[t] = mu + phi * (x[t-1] - mu) + sigma * np.random.normal() # Plot plt.figure(figsize=(10, 5)) plt.plot(range(1, T+1), x, linewidth=0.8, color='blue') plt.title('AR(1) time series') plt.tight_layout() plt.show()
Mean modeling¶
Moving average (MA)¶
Forecasting with moving average:
# Using the book's data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) # # Download S&P 500 data for 2020 # sp500 = yf.download('^GSPC', start='2020-01-01', end='2020-12-31') # y = np.log(sp500['Adj Close']) # Log-returns x = y.diff().dropna() # Create DataFrames to store all forecasts y_all = pd.DataFrame(y) y_all.columns = ['true'] x_all = pd.DataFrame(x) x_all.columns = ['true'] # MA(20) on log-prices y window = 20 y_forecast = y.rolling(window=window).mean() x_forecast = y_forecast - y x_forecast.name = y_forecast.name = 'MA(20) on log-prices' y_all = pd.concat([y_all, y_forecast.shift()], axis=1) x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1) # MA(20) on log-returns x x_forecast = x.rolling(window=window).mean() y_forecast = (y + x_forecast).shift() y_forecast.name = x_forecast.name = 'MA(20) on log-returns' x_all = pd.concat([x_all, x_forecast.shift()], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1) # Calculate forecast errors y_error = y_all.subtract(y_all['true'], axis=0)
# Plotting fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True) # Log-price time series y_all.plot(ax=ax1, linewidth=1) ax1.set_title('Log-price time series') ax1.set_ylabel('') ax1.set_xlabel('') # Log-return time series x_all.plot(ax=ax2, linewidth=1) ax2.set_title('Log-return time series') ax2.set_ylabel('') ax2.set_xlabel('') # Forecast error time series y_error.plot(ax=ax3, linewidth=1) ax3.set_title('Forecast error time series') ax3.set_ylabel('') ax3.set_xlabel('') plt.tight_layout() plt.show()
# Table of MSE df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T print("Mean Squared Error:") print(df_MSE)
Mean Squared Error: MA(20) on log-prices MA(20) on log-returns 0 0.004032 0.000725
ARMA¶
Forecasting with ARMA models:
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() # Create DataFrames to store all forecasts y_all = pd.DataFrame(y) y_all.columns = ['true'] x_all = pd.DataFrame(x) x_all.columns = ['true'] # Split data for training and testing T = len(x) T_trn = round(0.2 * T) # Fixed: 20% for training T_tst = T - T_trn # Remaining 80% for testing
# Function to forecast with ARIMA models def forecast_arima( data: pd.Series, order: Tuple[int, int, int], name: str, T_trn: int, refit_step: int = 10, window: Literal['rolling', 'expanding'] = 'rolling' ) -> pd.Series: """ Generate forecasts using an ARIMA model with periodic refitting. Parameters: ----------- data : pd.Series Time series data to forecast order : Tuple[int, int, int] ARIMA model order (p, d, q) name : str Name for the output forecast series T_trn : int Size of initial training set refit_step : int, default=10 Number of steps between complete model refits window : {'rolling', 'expanding'}, default='rolling' Type of window to use for forecasting Returns: -------- pd.Series Series containing the forecasted values """ T_tst = len(data) - T_trn forecasts = [] model_fit = None last_refit = -refit_step # Force initial fit for i in range(T_tst): current_idx = T_trn + i # Determine if we need to refit the model should_refit = (i - last_refit >= refit_step) or (model_fit is None) if should_refit: # Complete refit of the model if window == 'expanding': model = ARIMA(data.values[:current_idx], order=order) elif window == 'rolling': model = ARIMA(data.values[current_idx-T_trn:current_idx], order=order) else: raise ValueError('window must be "expanding" or "rolling"') model_fit = model.fit(method='innovations_mle') #print(model_fit.params) last_refit = i else: # Update the existing model with the new observation new_obs = data.values[current_idx-1:current_idx] model_fit = model_fit.append(new_obs) # Make one-step ahead forecast pred = model_fit.forecast(steps=1) forecasts.append(pred[0]) # Create a Series with the forecasts forecast_series = pd.Series([np.nan] * T_trn + forecasts, index=data.index, name=name) return forecast_series
# i.i.d. model (ARIMA(0,0,0)) x_forecast = forecast_arima(x, (0,0,0), 'i.i.d.', T_trn) y_forecast = (x_forecast + y.shift()).rename(x_forecast.name) x_all = pd.concat([x_all, x_forecast], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1) # AR(1) model x_forecast = forecast_arima(x, (1,0,0), 'AR(1)', T_trn) y_forecast = (x_forecast + y.shift()).rename(x_forecast.name) x_all = pd.concat([x_all, x_forecast], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1) # MA(1) model x_forecast = forecast_arima(x, (0,0,1), 'MA(1)', T_trn) y_forecast = (x_forecast + y.shift()).rename(x_forecast.name) x_all = pd.concat([x_all, x_forecast], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1) # ARMA(1,1) model x_forecast = forecast_arima(x, (1,0,1), 'ARMA(1,1)', T_trn) y_forecast = (x_forecast + y.shift()).rename(x_forecast.name) x_all = pd.concat([x_all, x_forecast], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1) # Calculate forecast errors y_error = y_all.subtract(y_all['true'], axis=0)
# Plotting fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True) # Log-price time series y_all.plot(ax=ax1, linewidth=1) ax1.set_title('Log-price time series') ax1.set_ylabel('') ax1.set_xlabel('') # Log-return time series x_all.plot(ax=ax2, linewidth=1) ax2.set_title('Log-return time series') ax2.set_ylabel('') ax2.set_xlabel('') # Forecast error time series y_error.plot(ax=ax3, linewidth=1) ax3.set_title('Forecast error time series') ax3.set_ylabel('') ax3.set_xlabel('') plt.tight_layout() plt.show()
# Table of MSE df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T print("Mean Squared Error:") print(df_MSE)
Mean Squared Error:
i.i.d. AR(1) MA(1) ARMA(1,1)
0 0.000754 0.000788 0.000808 0.000881
Kalman¶
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() mean_x = x.mean() var_x = x.var() # Create DataFrames to store all forecasts y_all = pd.DataFrame(y) y_all.columns = ['true'] x_all = pd.DataFrame(x) x_all.columns = ['true']
# Kalman on log-returns x kf = KalmanFilter( transition_matrices=[1], observation_matrices=[1], initial_state_mean=mean_x, initial_state_covariance=var_x, observation_covariance=var_x, transition_covariance=var_x ) state_means, _ = kf.filter(x.values) x_filtering = pd.Series(state_means.flatten(), index=x.index) y_forecast = (y + x_filtering).shift() y_forecast.name = x_filtering.name = 'Kalman on log-returns (dynamic)' x_all = pd.concat([x_all, x_filtering.shift().iloc[1:]], axis=1) y_all = pd.concat([y_all, y_forecast], axis=1)
# Kalman on log-prices y (static mu) kf = KalmanFilter( transition_matrices=[1], observation_matrices=[1], initial_state_mean=y.iloc[0], initial_state_covariance=var_x, observation_covariance=var_x, transition_covariance=var_x/10 ) state_means, _ = kf.filter(y.values) y_filtering = pd.Series(state_means.flatten(), index=y.index) x_forecast = y_filtering - y x_forecast.name = y_filtering.name = 'Kalman on log-prices (static)' y_all = pd.concat([y_all, y_filtering.shift()], axis=1) x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1)
# Kalman on log-prices y (dynamic mu) # For a 2-state model (position and velocity), we need a more complex setup kf = KalmanFilter( transition_matrices=[[1, 1], [0, 1]], observation_matrices=[[1, 0]], initial_state_mean=[y.iloc[0], mean_x], initial_state_covariance=np.eye(2) * var_x, observation_covariance=var_x, transition_covariance=np.array([[var_x, 0], [0, var_x]]) ) state_means, _ = kf.filter(y.values) y_filtering = pd.Series(state_means[:, 0], index=y.index) x_forecast = y_filtering - y x_forecast.name = y_filtering.name = 'Kalman on log-prices (dynamic)' y_all = pd.concat([y_all, y_filtering.shift()], axis=1) x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1)
# Calculate forecast errors y_error = y_all.subtract(y_all['true'], axis=0)
# Plotting fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True) # Log-price time series y_all.plot(ax=ax1, linewidth=1) ax1.set_title('Log-price time series') ax1.set_ylabel('') ax1.set_xlabel('') # Log-return time series x_all.plot(ax=ax2, linewidth=1) ax2.set_title('Log-return time series') ax2.set_ylabel('') ax2.set_xlabel('') # Forecast error time series y_error.plot(ax=ax3, linewidth=1) ax3.set_title('Forecast error time series') ax3.set_ylabel('') ax3.set_xlabel('') plt.tight_layout() plt.show()
# Table of MSE df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T print("Mean Squared Error:") print(df_MSE)
Mean Squared Error: Kalman on log-returns (dynamic) Kalman on log-prices (static) \ 0 0.001044 0.000976 Kalman on log-prices (dynamic) 0 0.000549
Volatility/Variance modeling¶
Moving average (MA)¶
Volatility envelope with moving averages:
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() x_all = pd.DataFrame(abs(x)) x_all.columns = ['absolute residuals']
# MA(5) on squared returns x^2 vol_forecast = np.sqrt(x.pow(2).rolling(window=5).mean()) vol_forecast.name = 'MA(5) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # MA(10) on squared returns x^2 vol_forecast = np.sqrt(x.pow(2).rolling(window=10).mean()) vol_forecast.name = 'MA(10) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # MA(20) on squared returns x^2 vol_forecast = np.sqrt(x.pow(2).rolling(window=20).mean()) vol_forecast.name = 'MA(20) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)
# Plot plt.figure(figsize=(10, 5)) for column in x_all.columns: if column == 'absolute residuals': plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column) else: plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column) plt.title('Residual time series and envelope') plt.legend() plt.tight_layout() plt.show()
EWMA¶
Volatility envelope with EWMA:
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() x_all = pd.DataFrame(abs(x)) x_all.columns = ['absolute residuals']
# EWMA(0.33) volatility (alpha = 2/(n+1) = 0.33 or n = 5) vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.33).mean()) vol_forecast.name = 'EWMA(0.33) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # EWMA(0.18) volatility (alpha = 2/(n+1) = 0.18 or n = 10) vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.18).mean()) vol_forecast.name = 'EWMA(0.18) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # EWMA(0.10) volatility (alpha = 2/(n+1) = 0.10 or n = 20) vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.10).mean()) vol_forecast.name = 'EWMA(0.10) volatility' x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)
# Plot plt.figure(figsize=(10, 5)) for column in x_all.columns: if column == 'absolute residuals': plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column) else: plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column) plt.title('Residual time series and envelope') plt.legend() plt.tight_layout() plt.show()
GARCH¶
Volatility envelope with GARCH models:
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() x_all = pd.DataFrame(abs(x)) x_all.columns = ['absolute residuals'] T = len(x) T_trn = round(0.2 * T) # Fixed: 20% for training T_tst = T - T_trn
# Function to forecast with GARCH models def forecast_garch( data: pd.Series, p: int, q: int, name: str, T_trn: int, refit_step: int = 1, window: Literal['rolling', 'expanding'] = 'rolling' ) -> pd.Series: """ Generate volatility forecasts using a GARCH model with periodic refitting. Parameters: ----------- data : pd.Series Time series data to forecast volatility for p : int GARCH model order (p) q : int GARCH model order (q) name : str Name for the output forecast series T_trn : int Size of initial training set refit_step : int, default=10 Number of steps between complete model refits window : {'rolling', 'expanding'}, default='rolling' Type of window to use for forecasting Returns: -------- pd.Series Series containing the forecasted volatility values """ T_tst = len(data) - T_trn forecasts = [] model_fit = None last_refit = -refit_step # Force initial fit for i in range(T_tst): current_idx = T_trn + i # Determine if we need to refit the model should_refit = (i - last_refit >= refit_step) or (model_fit is None) if should_refit: # Complete refit of the model if window == 'expanding': train_data = data.iloc[:current_idx] elif window == 'rolling': train_data = data.iloc[current_idx-T_trn:current_idx] else: raise ValueError('window must be "expanding" or "rolling"') # Create and fit the GARCH model model = arch_model(train_data, vol='GARCH', p=p, q=q, mean='Zero', rescale=True) model_fit = model.fit(disp='off') last_refit = i # Make one-step ahead forecast pred = model_fit.forecast(horizon=1) # Extract the volatility forecast (standard deviation) vol_forecast = np.sqrt(pred.variance.iloc[-1, 0]) / model_fit.scale forecasts.append(vol_forecast) # Create a Series with the forecasts forecast_series = pd.Series([np.nan] * T_trn + forecasts, index=data.index, name=name) return forecast_series
# ARCH(5) model vol_forecast = forecast_garch(x, 5, 0, 'ARCH(5)', T_trn) x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # GARCH(1,1) model vol_forecast = forecast_garch(x, 1, 1, 'GARCH(1,1)', T_trn) x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1) # GARCH(5,1) model vol_forecast = forecast_garch(x, 5, 1, 'GARCH(5,1)', T_trn) x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)
# Plot plt.figure(figsize=(10, 5)) for column in x_all.columns: if column == 'absolute residuals': plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column) else: plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column) plt.title('Residual time series and envelope') plt.legend() plt.tight_layout() plt.show()
Stochastic volatility (SV)¶
Volatility envelope with SV modeling via MCMC (packages pyflux and pymc are popular, good luck installing them):
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() # Create DataFrames for results env_all = pd.DataFrame(abs(x)) env_all.columns = env_all.columns = ['absolute residuals']
# Stochastic Volatility Model using PyMC with pm.Model() as sv_model: # Priors step_size = pm.Exponential('step_size', 10) volatility = pm.GaussianRandomWalk('volatility', sigma=step_size, init_dist=pm.Normal.dist(0, 100), shape=len(x)) nu = pm.Exponential('nu', 0.1) # Likelihood using actual returns (x.values) returns = pm.StudentT('returns', nu=nu, sigma=np.exp(volatility), observed=x.values) # Sample posterior trace = pm.sample(tune=1000, draws=1000, chains=4, cores=4, random_seed=42) # Calculate posterior mean of standard deviation volatility_mean = np.exp(trace.posterior['volatility'].mean(('chain', 'draw'))) vol_forecast = pd.Series(volatility_mean, index=x.index, name='SV').shift(1) # Combine datasets with proper alignment env_all = pd.concat([env_all, vol_forecast], axis=1).dropna()
100.00% [8000/8000 00:32<00:00 Sampling 4 chains, 0 divergences]
# Plot plt.figure(figsize=(10, 5)) plt.plot(env_all.index, env_all['absolute residuals'], linewidth=0.5, color='darkslategray', label='absolute residuals') plt.plot(env_all.index, env_all['SV'], linewidth=1.2, label='SV') plt.title('Residual time series and envelope') plt.legend() plt.tight_layout() plt.show()
SV via Kalman¶
Volatility envelope with SV modeling via Kalman filter:
# Get data y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX']) x = y.diff().dropna() # Prepare data for Kalman filter log_x2 = np.log(x**2 + 1e-8) # Adding small constant to avoid log(0) abs_x = abs(x) # Create DataFrames for results env_forecast = pd.DataFrame(abs_x) env_smoothing = pd.DataFrame(abs_x) env_forecast.columns = env_smoothing.columns = ['absolute residuals']
# Kalman random walk plus Gaussian noise model kf = KalmanFilter( transition_matrices=np.array([[1.0]]), # B = "identity" observation_matrices=np.array([[1.0]]), # Z = "identity" observation_offsets=np.array([-1.27]), # A = matrix(-1.27) transition_offsets=np.array([0.0]), # U = "zero" observation_covariance=np.array([[np.pi**2/2]]), # R = matrix(pi^2/2) transition_covariance=np.array([[0.1]]), # Initial value for Q (will be estimated) initial_state_mean=np.array([np.log(x.var())]), # x0 = matrix(log(var(x))) initial_state_covariance=np.array([[1e-3]]), # V0 = matrix(1e-3) em_vars=['transition_covariance'] # Estimate Q (var_state) ) # Fit to estimate transition_covariance (var_state) kf = kf.em(log_x2.values, n_iter=5) # Get filtered and smoothed states filtered_state_means, _ = kf.filter(log_x2.values.reshape(-1, 1)) smoothed_state_means, _ = kf.smooth(log_x2.values.reshape(-1, 1)) # Convert to Series with original index h_forecast = pd.Series(filtered_state_means.flatten(), index=x.index) h_smoothing = pd.Series(smoothed_state_means.flatten(), index=x.index) h_forecast.name = h_smoothing.name = 'SV - random walk' # Transform back to volatility scale (exp(h/2)) vol_forecast = np.exp(h_forecast/2) vol_smoothing = np.exp(h_smoothing/2) # Add to result DataFrames env_forecast = pd.concat([env_forecast, vol_forecast], axis=1) env_smoothing = pd.concat([env_smoothing, vol_smoothing], axis=1)
# Kalman AR(1) plus Gaussian noise model # First run EM to estimate the AR(1) parameters (phi, gamma, var_state) kf_ar1 = KalmanFilter( transition_matrices=np.array([[0.9]]), # B = phi (initial value before EM) observation_matrices=np.array([[1.0]]), # Z = "identity" observation_offsets=np.array([-1.27]), # A = matrix(-1.27) transition_offsets=np.array([0.1]), # U = gamma (initial value before EM) observation_covariance=np.array([[np.pi**2/2]]), # R = matrix(pi^2/2) transition_covariance=np.array([[0.1]]), # Q (initial value before EM) initial_state_mean=np.array([np.log(x.var())]), # x0 = matrix(log(var(x))) initial_state_covariance=np.array([[1e-3]]), # V0 = matrix(1e-3) em_vars=['transition_matrices', # Estimate phi 'transition_offsets', # Estimate gamma 'transition_covariance'] # Estimate var_state ) # Fit to estimate AR(1) parameters kf_ar1 = kf_ar1.em(log_x2.values.reshape(-1, 1), n_iter=5) # Get filtered and smoothed states filtered_state_means_ar1, _ = kf_ar1.filter(log_x2.values.reshape(-1, 1)) smoothed_state_means_ar1, _ = kf_ar1.smooth(log_x2.values.reshape(-1, 1)) # Convert to Series with original index h_forecast_ar1 = pd.Series(filtered_state_means_ar1.flatten(), index=x.index) h_smoothing_ar1 = pd.Series(smoothed_state_means_ar1.flatten(), index=x.index) h_forecast_ar1.name = h_smoothing_ar1.name = 'SV - AR(1)' # Transform back to volatility scale (exp(h/2)) vol_forecast_ar1 = np.exp(h_forecast_ar1/2) vol_smoothing_ar1 = np.exp(h_smoothing_ar1/2) # Add to result DataFrames env_forecast = pd.concat([env_forecast, vol_forecast_ar1], axis=1) env_smoothing = pd.concat([env_smoothing, vol_smoothing_ar1], axis=1) # Print the estimated parameters phi = kf_ar1.transition_matrices[0, 0] gamma = kf_ar1.transition_offsets[0] var_state = kf_ar1.transition_covariance[0, 0] print(f"Estimated AR(1) parameters: phi={phi:.4f}, gamma={gamma:.4f}, var_state={var_state:.4f}")
Estimated AR(1) parameters: phi=1.0070, gamma=0.0516, var_state=0.1187
# Plot fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10), sharex=True) # Plot forecast envelope in the first subplot ax1.plot(env_forecast.index, env_forecast['absolute residuals'], linewidth=0.5, color='darkslategray', label='absolute residuals') ax1.plot(env_forecast.index, env_forecast['SV - random walk'], linewidth=1.2, label='SV - random walk') if 'SV - AR(1)' in env_forecast.columns: ax1.plot(env_forecast.index, env_forecast['SV - AR(1)'], linewidth=1.2, label='SV - AR(1)') ax1.set_title('Residual time series and envelope forecast') ax1.legend() # Plot smoothing envelope in the second subplot ax2.plot(env_smoothing.index, env_smoothing['absolute residuals'], linewidth=0.5, color='darkslategray', label='absolute residuals') ax2.plot(env_smoothing.index, env_smoothing['SV - random walk'], linewidth=1.2, label='SV - random walk') if 'SV - AR(1)' in env_smoothing.columns: ax2.plot(env_smoothing.index, env_smoothing['SV - AR(1)'], linewidth=1.2, label='SV - AR(1)') ax2.set_title('Residual time series and envelope smoothing') ax2.legend() # Format x-axis with dates import matplotlib.dates as mdates ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) plt.tight_layout() plt.show()