Python Code for Portfolio Optimization
Chapter 4 – Financial Data: Time Series Modeling

Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.

Last update: March 13, 2025

Contributors:

Packages¶

The following packages are used in the examples:

# Basic finance and data manipulation
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
from typing import Tuple, Literal

# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020

# Time series models
from statsmodels.tsa.arima.model import ARIMA
from pykalman import KalmanFilter
#from statsmodels.tsa.statespace.kalman_filter import KalmanFilter
from arch import arch_model
import pymc as pm  # for stochastic volatility modeling

# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

Temporal structure¶

Example of a synthetic Gaussian AR(1) time series:

# Specify an AR(1) model with given coefficients and parameters
np.random.seed(42)
T = 300
mu = 0.01
phi = -0.9
sigma = 0.2

# Simulate one path
x = np.zeros(T)
x[0] = mu
for t in range(1, T):
    x[t] = mu + phi * (x[t-1] - mu) + sigma * np.random.normal()

# Plot
plt.figure(figsize=(10, 5))
plt.plot(range(1, T+1), x, linewidth=0.8, color='blue')
plt.title('AR(1) time series')
plt.tight_layout()
plt.show()

Mean modeling¶

Moving average (MA)¶

Forecasting with moving average:

# Using the book's data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])

# # Download S&P 500 data for 2020
# sp500 = yf.download('^GSPC', start='2020-01-01', end='2020-12-31')
# y = np.log(sp500['Adj Close'])

# Log-returns
x = y.diff().dropna()

# Create DataFrames to store all forecasts
y_all = pd.DataFrame(y)
y_all.columns = ['true']
x_all = pd.DataFrame(x)
x_all.columns = ['true']

# MA(20) on log-prices y
window = 20
y_forecast = y.rolling(window=window).mean()
x_forecast = y_forecast - y
x_forecast.name = y_forecast.name = 'MA(20) on log-prices'
y_all = pd.concat([y_all, y_forecast.shift()], axis=1)
x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1)

# MA(20) on log-returns x
x_forecast = x.rolling(window=window).mean()
y_forecast = (y + x_forecast).shift()
y_forecast.name = x_forecast.name = 'MA(20) on log-returns'
x_all = pd.concat([x_all, x_forecast.shift()], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# Calculate forecast errors
y_error = y_all.subtract(y_all['true'], axis=0)

# Plotting
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True)

# Log-price time series
y_all.plot(ax=ax1, linewidth=1)
ax1.set_title('Log-price time series')
ax1.set_ylabel('')
ax1.set_xlabel('')

# Log-return time series
x_all.plot(ax=ax2, linewidth=1)
ax2.set_title('Log-return time series')
ax2.set_ylabel('')
ax2.set_xlabel('')

# Forecast error time series
y_error.plot(ax=ax3, linewidth=1)
ax3.set_title('Forecast error time series')
ax3.set_ylabel('')
ax3.set_xlabel('')

plt.tight_layout()
plt.show()

# Table of MSE
df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T
print("Mean Squared Error:")
print(df_MSE)

Mean Squared Error:
   MA(20) on log-prices  MA(20) on log-returns
0              0.004032               0.000725

ARMA¶

Forecasting with ARMA models:

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()

# Create DataFrames to store all forecasts
y_all = pd.DataFrame(y)
y_all.columns = ['true']
x_all = pd.DataFrame(x)
x_all.columns = ['true']

# Split data for training and testing
T = len(x)
T_trn = round(0.2 * T)  # Fixed: 20% for training
T_tst = T - T_trn       # Remaining 80% for testing

# Function to forecast with ARIMA models
def forecast_arima(
    data: pd.Series,
    order: Tuple[int, int, int],
    name: str,
    T_trn: int,
    refit_step: int = 10,
    window: Literal['rolling', 'expanding'] = 'rolling'
) -> pd.Series:
    """
    Generate forecasts using an ARIMA model with periodic refitting.

    Parameters:
    -----------
    data : pd.Series
        Time series data to forecast
    order : Tuple[int, int, int]
        ARIMA model order (p, d, q)
    name : str
        Name for the output forecast series
    T_trn : int
        Size of initial training set
    refit_step : int, default=10
        Number of steps between complete model refits
    window : {'rolling', 'expanding'}, default='rolling'
        Type of window to use for forecasting

    Returns:
    --------
    pd.Series
        Series containing the forecasted values
    """
    T_tst = len(data) - T_trn
    forecasts = []
    model_fit = None
    last_refit = -refit_step  # Force initial fit

    for i in range(T_tst):
        current_idx = T_trn + i

        # Determine if we need to refit the model
        should_refit = (i - last_refit >= refit_step) or (model_fit is None)

        if should_refit:
            # Complete refit of the model
            if window == 'expanding':
                model = ARIMA(data.values[:current_idx], order=order)
            elif window == 'rolling':
                model = ARIMA(data.values[current_idx-T_trn:current_idx], order=order)
            else:
                raise ValueError('window must be "expanding" or "rolling"')
            model_fit = model.fit(method='innovations_mle')
            #print(model_fit.params)
            last_refit = i
        else:
            # Update the existing model with the new observation
            new_obs = data.values[current_idx-1:current_idx]
            model_fit = model_fit.append(new_obs)

        # Make one-step ahead forecast
        pred = model_fit.forecast(steps=1)
        forecasts.append(pred[0])

    # Create a Series with the forecasts
    forecast_series = pd.Series([np.nan] * T_trn + forecasts, index=data.index, name=name)
    return forecast_series

# i.i.d. model (ARIMA(0,0,0))
x_forecast = forecast_arima(x, (0,0,0), 'i.i.d.', T_trn)
y_forecast = (x_forecast + y.shift()).rename(x_forecast.name)
x_all = pd.concat([x_all, x_forecast], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# AR(1) model
x_forecast = forecast_arima(x, (1,0,0), 'AR(1)', T_trn)
y_forecast = (x_forecast + y.shift()).rename(x_forecast.name)
x_all = pd.concat([x_all, x_forecast], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# MA(1) model
x_forecast = forecast_arima(x, (0,0,1), 'MA(1)', T_trn)
y_forecast = (x_forecast + y.shift()).rename(x_forecast.name)
x_all = pd.concat([x_all, x_forecast], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# ARMA(1,1) model
x_forecast = forecast_arima(x, (1,0,1), 'ARMA(1,1)', T_trn)
y_forecast = (x_forecast + y.shift()).rename(x_forecast.name)
x_all = pd.concat([x_all, x_forecast], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# Calculate forecast errors
y_error = y_all.subtract(y_all['true'], axis=0)

# Plotting
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True)

# Log-price time series
y_all.plot(ax=ax1, linewidth=1)
ax1.set_title('Log-price time series')
ax1.set_ylabel('')
ax1.set_xlabel('')

# Log-return time series
x_all.plot(ax=ax2, linewidth=1)
ax2.set_title('Log-return time series')
ax2.set_ylabel('')
ax2.set_xlabel('')

# Forecast error time series
y_error.plot(ax=ax3, linewidth=1)
ax3.set_title('Forecast error time series')
ax3.set_ylabel('')
ax3.set_xlabel('')

plt.tight_layout()
plt.show()

# Table of MSE
df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T
print("Mean Squared Error:")
print(df_MSE)

Mean Squared Error:
     i.i.d.     AR(1)     MA(1)  ARMA(1,1)
0  0.000754  0.000788  0.000808   0.000881

Kalman¶

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()
mean_x = x.mean()
var_x = x.var()

# Create DataFrames to store all forecasts
y_all = pd.DataFrame(y)
y_all.columns = ['true']
x_all = pd.DataFrame(x)
x_all.columns = ['true']

# Kalman on log-returns x
kf = KalmanFilter(
    transition_matrices=[1],
    observation_matrices=[1],
    initial_state_mean=mean_x,
    initial_state_covariance=var_x,
    observation_covariance=var_x,
    transition_covariance=var_x
)
state_means, _ = kf.filter(x.values)
x_filtering = pd.Series(state_means.flatten(), index=x.index)
y_forecast = (y + x_filtering).shift()
y_forecast.name = x_filtering.name = 'Kalman on log-returns (dynamic)'
x_all = pd.concat([x_all, x_filtering.shift().iloc[1:]], axis=1)
y_all = pd.concat([y_all, y_forecast], axis=1)

# Kalman on log-prices y (static mu)
kf = KalmanFilter(
    transition_matrices=[1],
    observation_matrices=[1],
    initial_state_mean=y.iloc[0],
    initial_state_covariance=var_x,
    observation_covariance=var_x,
    transition_covariance=var_x/10
)
state_means, _ = kf.filter(y.values)
y_filtering = pd.Series(state_means.flatten(), index=y.index)
x_forecast = y_filtering - y
x_forecast.name = y_filtering.name = 'Kalman on log-prices (static)'
y_all = pd.concat([y_all, y_filtering.shift()], axis=1)
x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1)

# Kalman on log-prices y (dynamic mu)
# For a 2-state model (position and velocity), we need a more complex setup
kf = KalmanFilter(
    transition_matrices=[[1, 1], [0, 1]],
    observation_matrices=[[1, 0]],
    initial_state_mean=[y.iloc[0], mean_x],
    initial_state_covariance=np.eye(2) * var_x,
    observation_covariance=var_x,
    transition_covariance=np.array([[var_x, 0], [0, var_x]])
)
state_means, _ = kf.filter(y.values)
y_filtering = pd.Series(state_means[:, 0], index=y.index)
x_forecast = y_filtering - y
x_forecast.name = y_filtering.name = 'Kalman on log-prices (dynamic)'
y_all = pd.concat([y_all, y_filtering.shift()], axis=1)
x_all = pd.concat([x_all, x_forecast.shift().iloc[1:]], axis=1)

# Calculate forecast errors
y_error = y_all.subtract(y_all['true'], axis=0)

# Plotting
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=True)

# Log-price time series
y_all.plot(ax=ax1, linewidth=1)
ax1.set_title('Log-price time series')
ax1.set_ylabel('')
ax1.set_xlabel('')

# Log-return time series
x_all.plot(ax=ax2, linewidth=1)
ax2.set_title('Log-return time series')
ax2.set_ylabel('')
ax2.set_xlabel('')

# Forecast error time series
y_error.plot(ax=ax3, linewidth=1)
ax3.set_title('Forecast error time series')
ax3.set_ylabel('')
ax3.set_xlabel('')

plt.tight_layout()
plt.show()

# Table of MSE
df_MSE = pd.DataFrame(y_error.drop('true', axis=1).pow(2).mean(skipna=True)).T
print("Mean Squared Error:")
print(df_MSE)

Mean Squared Error:
   Kalman on log-returns (dynamic)  Kalman on log-prices (static)  \
0                         0.001044                       0.000976   

   Kalman on log-prices (dynamic)  
0                        0.000549

Volatility/Variance modeling¶

Moving average (MA)¶

Volatility envelope with moving averages:

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()
x_all = pd.DataFrame(abs(x))
x_all.columns = ['absolute residuals']

# MA(5) on squared returns x^2
vol_forecast = np.sqrt(x.pow(2).rolling(window=5).mean())
vol_forecast.name = 'MA(5) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# MA(10) on squared returns x^2
vol_forecast = np.sqrt(x.pow(2).rolling(window=10).mean())
vol_forecast.name = 'MA(10) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# MA(20) on squared returns x^2
vol_forecast = np.sqrt(x.pow(2).rolling(window=20).mean())
vol_forecast.name = 'MA(20) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# Plot
plt.figure(figsize=(10, 5))
for column in x_all.columns:
    if column == 'absolute residuals':
        plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column)
    else:
        plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column)

plt.title('Residual time series and envelope')
plt.legend()
plt.tight_layout()
plt.show()

EWMA¶

Volatility envelope with EWMA:

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()
x_all = pd.DataFrame(abs(x))
x_all.columns = ['absolute residuals']

# EWMA(0.33) volatility (alpha = 2/(n+1) = 0.33 or n = 5)
vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.33).mean())
vol_forecast.name = 'EWMA(0.33) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# EWMA(0.18) volatility (alpha = 2/(n+1) = 0.18 or n = 10)
vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.18).mean())
vol_forecast.name = 'EWMA(0.18) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# EWMA(0.10) volatility (alpha = 2/(n+1) = 0.10 or n = 20)
vol_forecast = np.sqrt(x.pow(2).ewm(alpha=0.10).mean())
vol_forecast.name = 'EWMA(0.10) volatility'
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# Plot
plt.figure(figsize=(10, 5))
for column in x_all.columns:
    if column == 'absolute residuals':
        plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column)
    else:
        plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column)

plt.title('Residual time series and envelope')
plt.legend()
plt.tight_layout()
plt.show()

GARCH¶

Volatility envelope with GARCH models:

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()
x_all = pd.DataFrame(abs(x))
x_all.columns = ['absolute residuals']

T = len(x)
T_trn = round(0.2 * T)  # Fixed: 20% for training
T_tst = T - T_trn

# Function to forecast with GARCH models
def forecast_garch(
    data: pd.Series,
    p: int,
    q: int,
    name: str,
    T_trn: int,
    refit_step: int = 1,
    window: Literal['rolling', 'expanding'] = 'rolling'
) -> pd.Series:
    """
    Generate volatility forecasts using a GARCH model with periodic refitting.

    Parameters:
    -----------
    data : pd.Series
        Time series data to forecast volatility for
    p : int
        GARCH model order (p)
    q : int
        GARCH model order (q)
    name : str
        Name for the output forecast series
    T_trn : int
        Size of initial training set
    refit_step : int, default=10
        Number of steps between complete model refits
    window : {'rolling', 'expanding'}, default='rolling'
        Type of window to use for forecasting

    Returns:
    --------
    pd.Series
        Series containing the forecasted volatility values
    """
    T_tst = len(data) - T_trn
    forecasts = []
    model_fit = None
    last_refit = -refit_step  # Force initial fit

    for i in range(T_tst):
        current_idx = T_trn + i

        # Determine if we need to refit the model
        should_refit = (i - last_refit >= refit_step) or (model_fit is None)

        if should_refit:
            # Complete refit of the model
            if window == 'expanding':
                train_data = data.iloc[:current_idx]
            elif window == 'rolling':
                train_data = data.iloc[current_idx-T_trn:current_idx]
            else:
                raise ValueError('window must be "expanding" or "rolling"')

            # Create and fit the GARCH model
            model = arch_model(train_data, vol='GARCH', p=p, q=q, mean='Zero', rescale=True)
            model_fit = model.fit(disp='off')
            last_refit = i

        # Make one-step ahead forecast
        pred = model_fit.forecast(horizon=1)
        # Extract the volatility forecast (standard deviation)
        vol_forecast = np.sqrt(pred.variance.iloc[-1, 0]) / model_fit.scale
        forecasts.append(vol_forecast)

    # Create a Series with the forecasts
    forecast_series = pd.Series([np.nan] * T_trn + forecasts, index=data.index, name=name)
    return forecast_series

# ARCH(5) model
vol_forecast = forecast_garch(x, 5, 0, 'ARCH(5)', T_trn)
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# GARCH(1,1) model
vol_forecast = forecast_garch(x, 1, 1, 'GARCH(1,1)', T_trn)
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# GARCH(5,1) model
vol_forecast = forecast_garch(x, 5, 1, 'GARCH(5,1)', T_trn)
x_all = pd.concat([x_all, vol_forecast.shift().iloc[1:]], axis=1)

# Plot
plt.figure(figsize=(10, 5))
for column in x_all.columns:
    if column == 'absolute residuals':
        plt.plot(x_all.index, x_all[column], linewidth=0.5, color='darkslategray', label=column)
    else:
        plt.plot(x_all.index, x_all[column], linewidth=1.2, label=column)

plt.title('Residual time series and envelope')
plt.legend()
plt.tight_layout()
plt.show()

Stochastic volatility (SV)¶

Volatility envelope with SV modeling via MCMC (packages pyflux and pymc are popular, good luck installing them):

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()

# Create DataFrames for results
env_all = pd.DataFrame(abs(x))
env_all.columns = env_all.columns = ['absolute residuals']

# Stochastic Volatility Model using PyMC
with pm.Model() as sv_model:
    # Priors
    step_size = pm.Exponential('step_size', 10)
    volatility = pm.GaussianRandomWalk('volatility',
                                      sigma=step_size,
                                      init_dist=pm.Normal.dist(0, 100),
                                      shape=len(x))
    nu = pm.Exponential('nu', 0.1)

    # Likelihood using actual returns (x.values)
    returns = pm.StudentT('returns',
                         nu=nu,
                         sigma=np.exp(volatility),
                         observed=x.values)

    # Sample posterior
    trace = pm.sample(tune=1000, draws=1000, chains=4, cores=4, random_seed=42)

# Calculate posterior mean of standard deviation
volatility_mean = np.exp(trace.posterior['volatility'].mean(('chain', 'draw')))
vol_forecast = pd.Series(volatility_mean,
                        index=x.index,
                        name='SV').shift(1)

# Combine datasets with proper alignment
env_all = pd.concat([env_all, vol_forecast], axis=1).dropna()

100.00% [8000/8000 00:32<00:00 Sampling 4 chains, 0 divergences]

# Plot
plt.figure(figsize=(10, 5))
plt.plot(env_all.index, env_all['absolute residuals'],
         linewidth=0.5, color='darkslategray', label='absolute residuals')
plt.plot(env_all.index, env_all['SV'],
         linewidth=1.2, label='SV')
plt.title('Residual time series and envelope')
plt.legend()
plt.tight_layout()
plt.show()

SV via Kalman¶

Volatility envelope with SV modeling via Kalman filter:

# Get data
y = np.log(SP500_index_2015to2020.loc['2020']['SP500_INDEX'])
x = y.diff().dropna()

# Prepare data for Kalman filter
log_x2 = np.log(x**2 + 1e-8)  # Adding small constant to avoid log(0)
abs_x = abs(x)

# Create DataFrames for results
env_forecast = pd.DataFrame(abs_x)
env_smoothing = pd.DataFrame(abs_x)
env_forecast.columns = env_smoothing.columns = ['absolute residuals']

# Kalman random walk plus Gaussian noise model
kf = KalmanFilter(
    transition_matrices=np.array([[1.0]]),           # B = "identity"
    observation_matrices=np.array([[1.0]]),          # Z = "identity"
    observation_offsets=np.array([-1.27]),           # A = matrix(-1.27)
    transition_offsets=np.array([0.0]),              # U = "zero"
    observation_covariance=np.array([[np.pi**2/2]]), # R = matrix(pi^2/2)
    transition_covariance=np.array([[0.1]]),         # Initial value for Q (will be estimated)
    initial_state_mean=np.array([np.log(x.var())]),  # x0 = matrix(log(var(x)))
    initial_state_covariance=np.array([[1e-3]]),     # V0 = matrix(1e-3)
    em_vars=['transition_covariance']                # Estimate Q (var_state)
)

# Fit to estimate transition_covariance (var_state)
kf = kf.em(log_x2.values, n_iter=5)

# Get filtered and smoothed states
filtered_state_means, _ = kf.filter(log_x2.values.reshape(-1, 1))
smoothed_state_means, _ = kf.smooth(log_x2.values.reshape(-1, 1))

# Convert to Series with original index
h_forecast = pd.Series(filtered_state_means.flatten(), index=x.index)
h_smoothing = pd.Series(smoothed_state_means.flatten(), index=x.index)
h_forecast.name = h_smoothing.name = 'SV - random walk'

# Transform back to volatility scale (exp(h/2))
vol_forecast = np.exp(h_forecast/2)
vol_smoothing = np.exp(h_smoothing/2)

# Add to result DataFrames
env_forecast = pd.concat([env_forecast, vol_forecast], axis=1)
env_smoothing = pd.concat([env_smoothing, vol_smoothing], axis=1)

# Kalman AR(1) plus Gaussian noise model
# First run EM to estimate the AR(1) parameters (phi, gamma, var_state)
kf_ar1 = KalmanFilter(
    transition_matrices=np.array([[0.9]]),           # B = phi (initial value before EM)
    observation_matrices=np.array([[1.0]]),          # Z = "identity"
    observation_offsets=np.array([-1.27]),           # A = matrix(-1.27)
    transition_offsets=np.array([0.1]),              # U = gamma (initial value before EM)
    observation_covariance=np.array([[np.pi**2/2]]), # R = matrix(pi^2/2)
    transition_covariance=np.array([[0.1]]),         # Q (initial value before EM)
    initial_state_mean=np.array([np.log(x.var())]),  # x0 = matrix(log(var(x)))
    initial_state_covariance=np.array([[1e-3]]),     # V0 = matrix(1e-3)
    em_vars=['transition_matrices',                  # Estimate phi
             'transition_offsets',                   # Estimate gamma
             'transition_covariance']                # Estimate var_state
)

# Fit to estimate AR(1) parameters
kf_ar1 = kf_ar1.em(log_x2.values.reshape(-1, 1), n_iter=5)

# Get filtered and smoothed states
filtered_state_means_ar1, _ = kf_ar1.filter(log_x2.values.reshape(-1, 1))
smoothed_state_means_ar1, _ = kf_ar1.smooth(log_x2.values.reshape(-1, 1))

# Convert to Series with original index
h_forecast_ar1 = pd.Series(filtered_state_means_ar1.flatten(), index=x.index)
h_smoothing_ar1 = pd.Series(smoothed_state_means_ar1.flatten(), index=x.index)
h_forecast_ar1.name = h_smoothing_ar1.name = 'SV - AR(1)'

# Transform back to volatility scale (exp(h/2))
vol_forecast_ar1 = np.exp(h_forecast_ar1/2)
vol_smoothing_ar1 = np.exp(h_smoothing_ar1/2)

# Add to result DataFrames
env_forecast = pd.concat([env_forecast, vol_forecast_ar1], axis=1)
env_smoothing = pd.concat([env_smoothing, vol_smoothing_ar1], axis=1)

# Print the estimated parameters
phi = kf_ar1.transition_matrices[0, 0]
gamma = kf_ar1.transition_offsets[0]
var_state = kf_ar1.transition_covariance[0, 0]
print(f"Estimated AR(1) parameters: phi={phi:.4f}, gamma={gamma:.4f}, var_state={var_state:.4f}")

Estimated AR(1) parameters: phi=1.0070, gamma=0.0516, var_state=0.1187

# Plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10), sharex=True)

# Plot forecast envelope in the first subplot
ax1.plot(env_forecast.index, env_forecast['absolute residuals'],
         linewidth=0.5, color='darkslategray', label='absolute residuals')
ax1.plot(env_forecast.index, env_forecast['SV - random walk'],
         linewidth=1.2, label='SV - random walk')
if 'SV - AR(1)' in env_forecast.columns:
    ax1.plot(env_forecast.index, env_forecast['SV - AR(1)'],
             linewidth=1.2, label='SV - AR(1)')
ax1.set_title('Residual time series and envelope forecast')
ax1.legend()

# Plot smoothing envelope in the second subplot
ax2.plot(env_smoothing.index, env_smoothing['absolute residuals'],
         linewidth=0.5, color='darkslategray', label='absolute residuals')
ax2.plot(env_smoothing.index, env_smoothing['SV - random walk'],
         linewidth=1.2, label='SV - random walk')
if 'SV - AR(1)' in env_smoothing.columns:
    ax2.plot(env_smoothing.index, env_smoothing['SV - AR(1)'],
             linewidth=1.2, label='SV - AR(1)')
ax2.set_title('Residual time series and envelope smoothing')
ax2.legend()

# Format x-axis with dates
import matplotlib.dates as mdates
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=2))

plt.tight_layout()
plt.show()