Python Code for Portfolio Optimization
Chapter 7 β Modern Portfolio Theory
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: February 19, 2026
Contributors:
LibrariesΒΆ
The following libraries are used in the examples:
# Core numerical computing import numpy as np import pandas as pd from typing import Dict, Tuple # For financial data import yfinance as yf # Loading financial data import empyrical as emp # Performance metrics # Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python") from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020 # Plotting import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns sns.set_theme(style="darkgrid") import plotly.io as pio pio.renderers.default = "notebook" pio.templates.default = "plotly_white" # Optimization import cvxpy as cp # interface for convex optimization solvers from scipy.optimize import minimize, LinearConstraint # for optimization from scipy.linalg import cholesky
$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}$ $\def\bSigma{\bm{\Sigma}}$
Helper FunctionsΒΆ
# Create wrapper to backtest multiple backtests (currently not available in skfolio, but soon will be) import time from datetime import timedelta def batch_cross_val_predict(optimizers, X, cv, **kwargs) -> list: """Run cross_val_predict on multiple optimizers with timing information""" bt_list = [] n_splits = cv.get_n_splits(X) total_start_time = time.time() print(f"\nπ Starting batch backtesting with {len(optimizers)} portfolio designs...") print(f"π Cross-validation: {n_splits} splits") print("=" * 70) for i, optim in enumerate(optimizers, 1): # Handle missing or None portfolio_params (fixes the original error) if hasattr(optim, 'portfolio_params') and optim.portfolio_params is not None: name = optim.portfolio_params.get('name', f'Portfolio {i}') else: name = f'Portfolio {i} ({type(optim).__name__})' print(f"\n[{i}/{len(optimizers)}] π Backtesting '{name}'...") # Start timing for this specific backtest start_time = time.time() try: bt = cross_val_predict( optim, X, cv=cv, portfolio_params=getattr(optim, 'portfolio_params', {}), **kwargs ) # Calculate elapsed time elapsed_time = time.time() - start_time elapsed_str = str(timedelta(seconds=int(elapsed_time))) bt_list.append(bt) print(f"β Completed '{name}' in {elapsed_str}") except Exception as e: elapsed_time = time.time() - start_time elapsed_str = str(timedelta(seconds=int(elapsed_time))) print(f"β Failed '{name}' after {elapsed_str}: {str(e)}") raise # Re-raise the exception to maintain original behavior # Calculate total elapsed time total_elapsed = time.time() - total_start_time total_elapsed_str = str(timedelta(seconds=int(total_elapsed))) print("=" * 70) print(f"π Batch backtesting completed!") print(f"β±οΈ Total time: {total_elapsed_str}") print(f"π Successfully processed {len(bt_list)} portfolios") return bt_list
# Create wrapper to plot portfolio compositions as a barplot def plot_composition_sidebyside(population, asset_names=None): """ Plot portfolio compositions side-by-side for a Population of portfolios. Parameters ---------- population : Population A skfolio Population object containing multiple portfolios. asset_names : list of str, optional List of asset names in the desired order. If provided, all assets will be displayed (with zero weights for missing assets). Returns ------- fig : plotly.graph_objects.Figure The plotly figure object. """ import plotly.graph_objects as go # Determine the asset order if asset_names is None: asset_names = population[0].composition.index.tolist() fig = go.Figure() # Add a bar trace for each portfolio for portfolio in population: df = portfolio.composition # shape: (n_assets, 1) # Reindex to include all assets in the specified order, filling missing with 0 weights = df.iloc[:, 0].reindex(asset_names, fill_value=0) fig.add_trace(go.Bar( x=asset_names, # asset names in specified order y=weights, # weights (0 for missing assets) name=portfolio.name # legend label (portfolio name) )) fig.update_layout( title="Portfolio Compositions", xaxis_title="Assets", yaxis_title="Weight", barmode='group', # bars side-by-side (not stacked) xaxis={'categoryorder': 'array', # enforce asset order 'categoryarray': asset_names} ) return fig
# Create a class to be able to specify portfolios direclty via a CVXPY code from skfolio.optimization import ConvexOptimization class Portfolio_via_CVXPY(ConvexOptimization): """Portfolio based on custom CVXPY function.""" def __init__(self, cvxpy_fun=None, **kwargs): super().__init__(**kwargs) self.cvxpy_fun = cvxpy_fun def fit(self, X, y=None): """Fit the custom optimization problem using the provided function.""" if self.cvxpy_fun is None: raise ValueError("cvxpy_fun must be provided at initialization") # Call the custom function to compute weights weights = self.cvxpy_fun(X) # Store results self.weights_ = np.asarray(weights) return self
Mean-Variance Portfolio (MVP)ΒΆ
Vanilla MVPΒΆ
We explore the mean-variance portfolio (MVP), $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \frac{\lambda}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with different values of the hyper-parameter $\lambda$.
# Get data stock_prices = SP500_stocks_2015to2020[ ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"] ].loc["2019":] T = stock_prices.shape[0] T_trn = round(0.70*T) stock_prices.head()
| AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2019-01-02 | 38.629 | 1539.13 | 18.83 | 31.8934 | 1054.68 | 24.5435 | 98.8602 | 54.2052 | 80.3520 | 91.445 |
| 2019-01-03 | 34.781 | 1500.28 | 17.05 | 30.5755 | 1025.47 | 24.0660 | 95.2233 | 52.5998 | 78.8374 | 88.849 |
| 2019-01-04 | 36.266 | 1575.39 | 19.00 | 31.5995 | 1078.07 | 25.1086 | 99.6521 | 53.4497 | 80.4594 | 91.944 |
| 2019-01-07 | 36.185 | 1629.51 | 20.57 | 32.5760 | 1075.92 | 25.8296 | 99.7792 | 53.2986 | 81.6418 | 91.633 |
| 2019-01-08 | 36.875 | 1656.58 | 20.75 | 33.0026 | 1085.37 | 26.5116 | 100.5027 | 52.8359 | 81.5930 | 91.643 |
# # Define portfolios # def EWP(X: pd.DataFrame) -> np.ndarray: N = X.shape[1] return np.repeat(1/N, N) def design_MVP(mu: np.ndarray, Sigma: np.ndarray, lmd: float = 1) -> np.ndarray: N = len(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu @ w - (lmd/2) * cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0]) prob.solve() return w.value def MVP_lmd1(X_lin: pd.DataFrame) -> np.ndarray: X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values return design_MVP(mu, Sigma, lmd=1) def MVP_lmd4(X_lin: pd.DataFrame) -> np.ndarray: X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values return design_MVP(mu, Sigma, lmd=4) def MVP_lmd16(X_lin: pd.DataFrame) -> np.ndarray: X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values return design_MVP(mu, Sigma, lmd=16) def MVP_lmd64(X_lin: pd.DataFrame) -> np.ndarray: X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values return design_MVP(mu, Sigma, lmd=64)
from skfolio import Population from skfolio.model_selection import cross_val_predict, WalkForward from skfolio.preprocessing import prices_to_returns # Wrap each portfolio within skfolio portf_EWP = Portfolio_via_CVXPY( cvxpy_fun=EWP, portfolio_params=dict(name="1/N") ) portf_MVP_lmd1 = Portfolio_via_CVXPY( cvxpy_fun=MVP_lmd1, portfolio_params=dict(name="MVP (lmd = 1)") ) portf_MVP_lmd4 = Portfolio_via_CVXPY( cvxpy_fun=MVP_lmd4, portfolio_params=dict(name="MVP (lmd = 4)") ) portf_MVP_lmd16 = Portfolio_via_CVXPY( cvxpy_fun=MVP_lmd16, portfolio_params=dict(name="MVP (lmd = 16)") ) portf_MVP_lmd64 = Portfolio_via_CVXPY( cvxpy_fun=MVP_lmd64, portfolio_params=dict(name="MVP (lmd = 64)") ) all_formulations = [ portf_EWP, portf_MVP_lmd1, portf_MVP_lmd4, portf_MVP_lmd16, portf_MVP_lmd64, ] n_portfolios = len(all_formulations) # WalkForward backtest bt_list = batch_cross_val_predict( all_formulations, X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None) cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1), ) bt_population = Population(bt_list)
π Starting batch backtesting with 5 portfolio designs... π Cross-validation: 4 splits ====================================================================== [1/5] π Backtesting '1/N'... β Completed '1/N' in 0:00:00 [2/5] π Backtesting 'MVP (lmd = 1)'... β Completed 'MVP (lmd = 1)' in 0:00:00 [3/5] π Backtesting 'MVP (lmd = 4)'... β Completed 'MVP (lmd = 4)' in 0:00:00 [4/5] π Backtesting 'MVP (lmd = 16)'... β Completed 'MVP (lmd = 16)' in 0:00:00 [5/5] π Backtesting 'MVP (lmd = 64)'... β Completed 'MVP (lmd = 64)' in 0:00:00 ====================================================================== π Batch backtesting completed! β±οΈ Total time: 0:00:00 π Successfully processed 5 portfolios
# Extract initial portfolio composition of each portfolio model initial_portfolios = Population([ setattr(mpp[0], 'name', mpp.name) or mpp[0] for mpp in bt_population ])
# Plot side-by-side barplot plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary bt_population.summary().loc[[ 'Annualized Sharpe Ratio', 'CVaR at 95%', 'MAX Drawdown', 'Annualized Mean', 'Annualized Standard Deviation' ]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 3.98 | 4.37% | 9.11% | 142.32% | 35.74% |
| MVP (lmd = 1) | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP (lmd = 4) | 3.42 | 5.39% | 15.40% | 143.44% | 41.97% |
| MVP (lmd = 16) | 3.82 | 3.85% | 10.72% | 120.08% | 31.41% |
| MVP (lmd = 64) | 4.24 | 3.49% | 9.19% | 124.13% | 29.29% |
# Plot NAV bt_population.set_portfolio_params(compounded=True) fig = bt_population.plot_cumulative_returns() fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)") fig
# Plot drawdowns bt_population.plot_drawdowns()
MVP with diversification heuristicsΒΆ
We now consider the MVP with two diversification heuristics:
- upper bound: $\w\leq0.25\times\bm{1}$; and
- diversification constraint: $\|\w\|_2^2 \leq 2/N$.
# # Define portfolios # def GMVP(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) Sigma = X_log.cov().values # Optimize portfolio N = len(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0]) prob.solve() return w.value def MVP_vanilla(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values # Optimize portfolio lmd = 1 N = len(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0]) prob.solve() return w.value def MVP_ub(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values # Optimize portfolio N = len(Sigma) lmd = 1 ub = 0.25 w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0, w <= ub]) prob.solve() return w.value def MVP_diversification(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values # Optimize portfolio N = len(Sigma) lmd = 1 D = 2/N w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0, cp.sum_squares(w) <= D]) prob.solve() return w.value
# Wrap each portfolio within skfolio portf_EWP = Portfolio_via_CVXPY( cvxpy_fun=EWP, portfolio_params=dict(name="1/N") ) portf_GMVP = Portfolio_via_CVXPY( cvxpy_fun=GMVP, portfolio_params=dict(name="GMVP") ) portf_MVP_vanilla = Portfolio_via_CVXPY( cvxpy_fun=MVP_vanilla, portfolio_params=dict(name="MVP") ) portf_MVP_ub = Portfolio_via_CVXPY( cvxpy_fun=MVP_ub, portfolio_params=dict(name="MVP with upper bound") ) portf_MVP_diversification = Portfolio_via_CVXPY( cvxpy_fun=MVP_diversification, portfolio_params=dict(name="MVP with diversific. const.") ) all_formulations = [ portf_EWP, portf_GMVP, portf_MVP_vanilla, portf_MVP_ub, portf_MVP_diversification, ] n_portfolios = len(all_formulations) # WalkForward backtest bt_list = batch_cross_val_predict( all_formulations, X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None) cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1), ) bt_population = Population(bt_list)
π Starting batch backtesting with 5 portfolio designs... π Cross-validation: 4 splits ====================================================================== [1/5] π Backtesting '1/N'... β Completed '1/N' in 0:00:00 [2/5] π Backtesting 'GMVP'... β Completed 'GMVP' in 0:00:00 [3/5] π Backtesting 'MVP'... β Completed 'MVP' in 0:00:00 [4/5] π Backtesting 'MVP with upper bound'... β Completed 'MVP with upper bound' in 0:00:00 [5/5] π Backtesting 'MVP with diversific. const.'... β Completed 'MVP with diversific. const.' in 0:00:00 ====================================================================== π Batch backtesting completed! β±οΈ Total time: 0:00:00 π Successfully processed 5 portfolios
# Extract initial portfolio composition of each portfolio model initial_portfolios = Population([ setattr(mpp[0], 'name', mpp.name) or mpp[0] for mpp in bt_population ]) # Plot side-by-side barplot plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary bt_population.summary().loc[[ 'Annualized Sharpe Ratio', 'CVaR at 95%', 'MAX Drawdown', 'Annualized Mean', 'Annualized Standard Deviation' ]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 3.98 | 4.37% | 9.11% | 142.32% | 35.74% |
| GMVP | 4.32 | 3.46% | 9.10% | 125.75% | 29.10% |
| MVP | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP with upper bound | 3.38 | 4.96% | 13.36% | 129.65% | 38.34% |
| MVP with diversific. const. | 3.31 | 4.91% | 12.91% | 130.03% | 39.26% |
# Plot NAV fig = bt_population.plot_cumulative_returns() fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)") fig
# Plot drawdowns bt_population.plot_drawdowns()
Maximum Sharpe Ratio Portfolio (MSRP)ΒΆ
Recall the maximum Sharpe ratio portfolio (MSRP) formulation: $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu - r_\textm{f}}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$
We will solve it numerically via different methods:
- general-purpose nonlinear solver
- bisection method
- Dinkelbach method
- Schaible transform
First, let's obtain the mean vector $\bmu$ and covariance matrix $\bSigma$ from the market data:
stock_prices = SP500_stocks_2015to2020[ ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"] ].loc["2019":] X_lin = stock_prices.pct_change().dropna() X_log = np.log(1 + X_lin) T, N = X_lin.shape mu = X_log.mean().values Sigma = X_log.cov().values
MSRP via general-purpose nonlinear solverΒΆ
We will solve this problem with the general-purpose nonlinear solver scipy.optimize.minimize in Python:
# Define the nonconvex objective function def fn_SR(w): return (w @ mu) / np.sqrt(w @ Sigma @ w) def neg_fn_SR(w): return -fn_SR(w) # Initial point N = len(mu) w0 = np.ones(N) / N # Equality constraint: sum(w) = 1 def constraint_eq(w): return np.sum(w) - 1 # Optimization res = minimize(neg_fn_SR, w0, method='SLSQP', bounds=[(0, None) for _ in range(N)], # w >= 0 constraints={'type': 'eq', 'fun': constraint_eq}) w_nonlinear_solver = res.x print(res)
message: Optimization terminated successfully
success: True
status: 0
fun: -0.10712133871542401
x: [ 5.622e-01 1.628e-01 1.909e-01 1.203e-17 1.059e-16
0.000e+00 0.000e+00 2.444e-19 7.543e-02 8.767e-03]
nit: 14
jac: [-9.832e-05 2.212e-04 2.336e-04 8.025e-02 4.231e-02
1.223e-01 1.348e-02 1.547e-02 -2.779e-04 -4.957e-04]
nfev: 154
njev: 14
multipliers: [-8.909e-05]
MSRP via bisection methodΒΆ
We are going to solve the nonconvex problem $$ \begin{array}{ll} \underset{\w,t}{\textm{maximize}} & t\\ \textm{subject to} & t \leq \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ & \bm{1}^\T\w=1, \quad \w\geq\bm{0}. \end{array} $$ via bisection on $t$ with the following (convex) SOCP problem for a given $t$: $$ \begin{array}{ll} \underset{\;}{\textm{find}} & \w\\ \textm{subject to} & t \left\Vert \bSigma^{1/2}\w\right\Vert_{2}\leq\w^\T\bmu\\ & \bm{1}^\T\w=1, \quad \w\geq\bm{0}. \end{array} $$
# Square-root of matrix Sigma Sigma_12 = cholesky(Sigma) print(np.max(np.abs(Sigma_12.T @ Sigma_12 - Sigma))) # sanity check
2.168404344971009e-19
# Create function for MVP (each iteration of bisection) import warnings def SOCP_bisection(t): w = cp.Variable(N) prob = cp.Problem(cp.Maximize(0), constraints=[t * cp.norm(Sigma_12 @ w, 2) <= mu.T @ w, sum(w) == 1, w >= 0]) # Solve the problem (ignore annoying warnings): with warnings.catch_warnings(): warnings.simplefilter("ignore") prob.solve() return {"status": prob.status, "w": w.value}
# Now run the bisection algorithm t_lb = 0 # for sure the problem is feasible in this case t_ub = 10 # a tighter upper bound could be chosen (10 is a conservative choice) while t_ub - t_lb > 1e-3: t = (t_ub + t_lb) / 2 # midpoint print(f"Current interval = ({t_lb:.3f}, {t_ub:.3f}) Trying midpoint t = {t:.4f}") if 'infeasible' in SOCP_bisection(t)["status"]: t_ub = t else: t_lb = t w_bisection = SOCP_bisection(t_lb)["w"]
Current interval = (0.000, 10.000) Trying midpoint t = 5.0000 Current interval = (0.000, 5.000) Trying midpoint t = 2.5000 Current interval = (0.000, 2.500) Trying midpoint t = 1.2500 Current interval = (0.000, 1.250) Trying midpoint t = 0.6250 Current interval = (0.000, 0.625) Trying midpoint t = 0.3125 Current interval = (0.000, 0.312) Trying midpoint t = 0.1562 Current interval = (0.000, 0.156) Trying midpoint t = 0.0781 Current interval = (0.078, 0.156) Trying midpoint t = 0.1172 Current interval = (0.078, 0.117) Trying midpoint t = 0.0977 Current interval = (0.098, 0.117) Trying midpoint t = 0.1074 Current interval = (0.098, 0.107) Trying midpoint t = 0.1025 Current interval = (0.103, 0.107) Trying midpoint t = 0.1050 Current interval = (0.105, 0.107) Trying midpoint t = 0.1062 Current interval = (0.106, 0.107) Trying midpoint t = 0.1068
# Comparison between two solutions w_df = pd.DataFrame({ 'nonlinear_solver': w_nonlinear_solver, 'bisection': w_bisection, }) print(w_df.round(3))
nonlinear_solver bisection 0 0.562 0.537 1 0.163 0.152 2 0.191 0.185 3 0.000 0.000 4 0.000 0.001 5 0.000 0.000 6 0.000 0.003 7 0.000 0.002 8 0.075 0.081 9 0.009 0.039
# Sharpe ratio of two solutions sharpe_ratios_df = pd.DataFrame({ 'Method': ['nonlinear_solver', 'bisection'], 'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection)] }) print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107
MSRP via Dinkelbach methodΒΆ
We are going to solve the nonconvex problem $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ by iteratively solving the (convex) SOCP problem for a given $y^{(k)}$: $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - y^{(k)} \left\Vert \bSigma^{1/2}\w\right\Vert_{2}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ where $$y^{(k)} = \frac{\mathbf{w}^{(k)\T}\bmu}{\sqrt{\w^{(k)\T}\bSigma\w^{(k)}}}.$$
# Create function for MVP (each iteration of Dinkelbach) def SOCP_Dinkelbach(y): w = cp.Variable(Sigma.shape[0]) prob = cp.Problem(cp.Maximize(mu.T @ w - y * cp.norm(Sigma_12 @ w, 2)), constraints=[cp.sum(w) == 1, w >= 0]) prob.solve() return w.value # Initial point (has to satisfy w_k.T @ mu >= 0) i_max = np.argmax(mu) w_k = np.zeros(N) w_k[i_max] = 1 # Now the iterative Dinkelbach algorithm k = 1 while k == 1 or np.max(np.abs(w_k - w_prev)) > 1e-6: w_prev = w_k y_k = (w_k.T @ mu) / np.sqrt(w_k.T @ Sigma @ w_k) w_k = SOCP_Dinkelbach(y_k) k += 1 w_Dinkelbach = w_k print(f"Number of iterations: {k-1}")
Number of iterations: 5
# Comparison among three solutions w_df = pd.DataFrame({ 'nonlinear_solver': w_nonlinear_solver, 'bisection': w_bisection, 'Dinkelbach': w_Dinkelbach }) print(w_df.round(3))
nonlinear_solver bisection Dinkelbach 0 0.562 0.537 0.561 1 0.163 0.152 0.156 2 0.191 0.185 0.188 3 0.000 0.000 0.000 4 0.000 0.001 0.000 5 0.000 0.000 0.000 6 0.000 0.003 0.000 7 0.000 0.002 0.000 8 0.075 0.081 0.079 9 0.009 0.039 0.015
# Sharpe ratio of three solutions sharpe_ratios_df = pd.DataFrame({ 'Method': ['nonlinear_solver', 'bisection', 'Dinkelbach'], 'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection), fn_SR(w_Dinkelbach)] }) print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107 2 Dinkelbach 0.107
MSRP via Schaible transformΒΆ
The maximum Sharpe ratio portfolio (MSRP) is the nonconvex problem $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ that can be rewritten in convex form as $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \tilde{\w}^\T\bSigma\tilde{\w}\\ \textm{subject to} & \tilde{\w}^\T\bmu = 1\\ & \tilde{\w}\ge\bm{0} \end{array} $$ and then $\w = \tilde{\w}/(\bm{1}^\T\tilde{\w})$.
This is a quadratic problem (QP) and we can conveniently use CVXR (although one is advised to use a specific QP solver like quadprog for speed and stability):
# create function for MSRP in convex form def MSRP_convex(mu, Sigma): w_ = cp.Variable(Sigma.shape[0]) prob = cp.Problem(cp.Minimize(cp.quad_form(w_, Sigma)), constraints=[w_ >= 0, mu.T @ w_ == 1]) prob.solve() w = w_.value / np.sum(w_.value) return w # This function can now be used as w_MSRP = MSRP_convex(mu, Sigma)
# Comparison among solutions comparison = pd.DataFrame({ 'w_nonlinear_solver': w_nonlinear_solver, 'w_bisection': w_bisection, 'w_Dinkelbach': w_Dinkelbach, 'w_MSRP': w_MSRP }) print(comparison.round(3))
w_nonlinear_solver w_bisection w_Dinkelbach w_MSRP 0 0.562 0.537 0.561 0.561 1 0.163 0.152 0.156 0.156 2 0.191 0.185 0.188 0.188 3 0.000 0.000 0.000 -0.000 4 0.000 0.001 0.000 0.000 5 0.000 0.000 0.000 0.000 6 0.000 0.003 0.000 -0.000 7 0.000 0.002 0.000 -0.000 8 0.075 0.081 0.079 0.079 9 0.009 0.039 0.015 0.015
# Sharpe ratio of different solutions sharpe_ratios_df = pd.DataFrame({ 'Method': ['nonlinear_solver', 'bisection', 'Dinkelbach', 'Schaible'], 'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection), fn_SR(w_Dinkelbach), fn_SR(w_MSRP)] }) print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107 2 Dinkelbach 0.107 3 Schaible 0.107
Conclusion: All methods produce the optimal solution.
Numerical experimentsΒΆ
We now compare the following portfolios:
global minimum variance portfolio (GMVP): $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
mean-variance portfolio (MVP): $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \frac{\lambda}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
maximum Sharpe ratio portfolio (MSRP): $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu - r_\textm{f}}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$
# Get data stock_prices = SP500_stocks_2015to2020[ ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"] ].loc["2019":] T = stock_prices.shape[0] T_trn = round(0.70*T)
# # Define portfolios # def GMVP(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) Sigma = X_log.cov().values # Optimize portfolio N = len(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0]) prob.solve() return w.value def MVP(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values # Optimize portfolio lmd = 1 N = len(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)), [cp.sum(w) == 1, w >= 0]) prob.solve() return w.value def MSRP(X_lin: pd.DataFrame) -> np.ndarray: # Estimate parameters X_log = np.log(1 + X_lin) mu = X_log.mean().values Sigma = X_log.cov().values # Optimize portfolio w = MSRP_convex(mu, Sigma) return w
from skfolio.optimization import MeanRisk, ObjectiveFunction from skfolio import RiskMeasure # Wrap each portfolio within skfolio portf_GMVP = Portfolio_via_CVXPY( cvxpy_fun=GMVP, portfolio_params=dict(name="GMVP") ) portf_GMVP_skfolio = MeanRisk( objective_function=ObjectiveFunction.MINIMIZE_RISK, risk_measure=RiskMeasure.VARIANCE, portfolio_params=dict(name="GMVP (skfolio)") ) portf_MVP = Portfolio_via_CVXPY( cvxpy_fun=MVP, portfolio_params=dict(name="MVP") ) portf_MVP_skfolio = MeanRisk( objective_function=ObjectiveFunction.MAXIMIZE_UTILITY, # uses mean - λ·var risk_measure=RiskMeasure.VARIANCE, risk_aversion=1/2, portfolio_params=dict(name="MVP (skfolio)") ) portf_MSRP = Portfolio_via_CVXPY( cvxpy_fun=MSRP, portfolio_params=dict(name="MSRP") ) portf_MSRP_skfolio = MeanRisk( objective_function=ObjectiveFunction.MAXIMIZE_RATIO, # uses mean / var^2 risk_measure=RiskMeasure.VARIANCE, portfolio_params=dict(name="MSRP (skfolio)") ) portf_MSRP_bis_skfolio = MeanRisk( objective_function=ObjectiveFunction.MAXIMIZE_RATIO, # uses mean / var risk_measure=RiskMeasure.STANDARD_DEVIATION, portfolio_params=dict(name="MSRP bis (skfolio)") ) all_formulations = [ portf_GMVP, portf_GMVP_skfolio, portf_MVP, portf_MVP_skfolio, portf_MSRP, portf_MSRP_skfolio, portf_MSRP_bis_skfolio, ] n_portfolios = len(all_formulations) # WalkForward backtest bt_list = batch_cross_val_predict( all_formulations, X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None) cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1), ) bt_population = Population(bt_list)
π Starting batch backtesting with 7 portfolio designs... π Cross-validation: 4 splits ====================================================================== [1/7] π Backtesting 'GMVP'... β Completed 'GMVP' in 0:00:00 [2/7] π Backtesting 'GMVP (skfolio)'... β Completed 'GMVP (skfolio)' in 0:00:00 [3/7] π Backtesting 'MVP'... β Completed 'MVP' in 0:00:00 [4/7] π Backtesting 'MVP (skfolio)'... β Completed 'MVP (skfolio)' in 0:00:00 [5/7] π Backtesting 'MSRP'... β Completed 'MSRP' in 0:00:00 [6/7] π Backtesting 'MSRP (skfolio)'... β Completed 'MSRP (skfolio)' in 0:00:00 [7/7] π Backtesting 'MSRP bis (skfolio)'... β Completed 'MSRP bis (skfolio)' in 0:00:00 ====================================================================== π Batch backtesting completed! β±οΈ Total time: 0:00:00 π Successfully processed 7 portfolios
# Extract initial portfolio composition of each portfolio model initial_portfolios = Population([ setattr(mpp[0], 'name', mpp.name) or mpp[0] for mpp in bt_population ]) # Plot side-by-side barplot plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary bt_population.summary().loc[[ 'Annualized Sharpe Ratio', 'CVaR at 95%', 'MAX Drawdown', 'Annualized Mean', 'Annualized Standard Deviation' ]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| GMVP | 4.32 | 3.46% | 9.10% | 125.75% | 29.10% |
| GMVP (skfolio) | 4.34 | 3.43% | 9.08% | 126.14% | 29.04% |
| MVP | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP (skfolio) | 2.68 | 6.65% | 15.33% | 171.61% | 63.95% |
| MSRP | 3.40 | 5.26% | 14.89% | 142.09% | 41.76% |
| MSRP (skfolio) | 3.39 | 5.38% | 15.11% | 146.71% | 43.28% |
| MSRP bis (skfolio) | 3.39 | 5.38% | 15.11% | 146.71% | 43.28% |
# Plot NAV fig = bt_population.plot_cumulative_returns() fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)") fig
# Plot drawdowns bt_population.plot_drawdowns()
Universal Algorithm for Portfolio OptimizationΒΆ
MSRPΒΆ
We consider two methods for the resolution of the MSRP:
Via the Schaible transform, i.e., solving $$ \begin{array}{ll} \underset{\bm{y}}{\textm{minimize}} & \bm{y}^\T\bSigma\bm{y}\\ \textm{subject to} & \bm{y}^\T\left(\bmu - r_\textm{f}\bm{1}\right) = 1\\ & \bm{y}\ge\bm{0}, \end{array} $$ with $\w = \bm{y}/\left(\bm{1}^\T\bm{y}\right)$.
Via the universal iterative SQP-MVP algorithm, i.e., we iteratively obtain $\w^{k+1}$, for $k=0,1,\dots,$, by solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \dfrac{\lambda^k}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with $$ \lambda^k = \dfrac{(\w^k)^\T\bmu - r_\textm{f}}{(\w^k)^\T\bSigma\w^k}. $$
# Prepare data stock_prices = SP500_stocks_2015to2020[ ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"] ].loc["2019":] X_lin = stock_prices.pct_change().dropna() X_log = np.log(1 + X_lin) T, N = X_lin.shape mu = X_log.mean().values Sigma = X_log.cov().values
# Using Schaible transform + CVX to get the optimal value w_ = cp.Variable(N) prob = cp.Problem(cp.Minimize(cp.quad_form(w_, Sigma)), constraints=[w_ >= 0, mu.T @ w_ == 1]) prob.solve() w_cvx = w_.value / np.sum(w_.value) obj_cvx = (w_cvx.T @ mu) / np.sqrt(w_cvx.T @ Sigma @ w_cvx)
# Using the SQP-MVP algorithm w = np.ones(N) / N obj_sqp = [(w.T @ mu) / np.sqrt(w.T @ Sigma @ w)] k = 0 for k in range(21): lmd_k = (w.T @ mu) / (w.T @ Sigma @ w) w_prev = w w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd_k/2) * cp.quad_form(w, Sigma)), constraints=[cp.sum(w) == 1, w >= 0]) prob.solve() w = w.value obj_sqp.append((w.T @ mu) / np.sqrt(w.T @ Sigma @ w)) if np.max(np.abs(w - w_prev)) < 1e-4: break
# Plot df = pd.DataFrame({ "k": range(len(obj_sqp)), "SQP iterative method": obj_sqp }) plt.figure(figsize=(10, 6)) plt.axhline(y=obj_cvx, color='r', linestyle='-', linewidth=1.5) plt.plot(df["k"], df["SQP iterative method"], color='b', linewidth=1.5, marker='o', markersize=2.5) plt.title("Convergence") plt.ylabel("objective value") plt.xlabel("iteration") plt.legend(["optimal value", "SQP iterative method"]) plt.grid(True) plt.show()
Mean-volatility portfolioΒΆ
We now consider two methods for the resolution of the mean-volatility portfolio:
Via an SOCP solver, i.e., solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \kappa\sqrt{\w^\T\bSigma\w}\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$
Via the universal iterative SQP-MVP algorithm, i.e., we iteratively obtain $\w^{k+1}$, for $k=0,1,\dots,$, by solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \dfrac{\lambda^k}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with $$ \lambda^k = \kappa/\sqrt{(\w^k)^\T\bSigma\w^k}. $$
kappa = 1.0 # Using CVX to get the optimal value Sigma_12 = cholesky(Sigma) w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - kappa * cp.norm(Sigma_12 @ w, 2)), constraints=[cp.sum(w) == 1, w >= 0]) result = prob.solve() w_cvx = w.value obj_cvx = mu.T @ w_cvx - kappa * np.sqrt(w_cvx.T @ Sigma @ w_cvx)
# Using the SQP-MVP algorithm w = np.ones(N) / N obj_sqp = [w.T @ mu - np.sqrt(w.T @ Sigma @ w)] for k in range(21): lmd_k = kappa / np.sqrt(w.T @ Sigma @ w) w_prev = w w = cp.Variable(N) prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd_k/2) * cp.quad_form(w, Sigma)), constraints=[cp.sum(w) == 1, w >= 0]) prob.solve() w = w.value obj_sqp.append(w.T @ mu - kappa * np.sqrt(w.T @ Sigma @ w)) if np.max(np.abs(w - w_prev)) < 1e-4: break
# Plot df = pd.DataFrame({ "k": range(len(obj_sqp)), "SQP iterative method": obj_sqp }) plt.figure(figsize=(10, 6)) plt.axhline(y=obj_cvx, color='r', linestyle='-', linewidth=1.5) plt.plot(df["k"], df["SQP iterative method"], color='b', linewidth=1.5, marker='o', markersize=2.5) plt.title("Convergence") plt.ylabel("objective value") plt.xlabel("iteration") plt.legend(["optimal value", "SQP iterative method"]) plt.grid(True) plt.show()