蒙特卡洛模拟-NUTS-with-the-support-from-PYMC
目录
蒙特卡洛模拟 NUTS with the support from PYMC
#蒙特卡洛模拟 NUTS with the support from PYMC
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc as pm
import arviz as az
from datetime import datetime, timedelta
import multiprocessing as mp
from dataIntegrator import CommonParameters
###设置matplotlib支持中文字体
plt.rcParams[‘font.sans-serif’] = [‘SimHei’, ‘Microsoft YaHei’, ‘DejaVu Sans’, ‘sans-serif’]
plt.rcParams[‘axes.unicode_minus’] = False
###解决警告显示问题
import warnings
warnings.filterwarnings(‘ignore’)
def main():
设置 Tushare token
token = CommonParameters.tuShareToken
ts.set_token(token)
pro = ts.pro_api()
# 选择股票代码(例如:贵州茅台 - 600519.SH)
ticker = '600519.SH'
start_date = '20230101'
end_date = '20250822'
### 获取股票日线数据
def get_stock_data(ts_code, start_date, end_date):
df = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date)
df = df.sort_values('trade_date')
df['trade_date'] = pd.to_datetime(df['trade_date'])
df.set_index('trade_date', inplace=True)
return df
### 下载数据
data = get_stock_data(ticker, start_date, end_date)
### 计算日收益率
data['Return'] = np.log(data['close'] / data['close'].shift(1))
returns = data['Return'].dropna()
print(f"数据时间范围: {data.index.min()} 到 {data.index.max()}")
print(f"收益率数据数量: {len(returns)}")
### 使用 PyMC 构建贝叶斯模型
with pm.Model() as model:
# 先验分布
mu = pm.Normal('mu', mu=0, sigma=0.1)
sigma = pm.HalfNormal('sigma', sigma=0.1)
### 似然函数
returns_obs = pm.Normal('returns_obs', mu=mu, sigma=sigma, observed=returns.values)
### 风险指标 - 必须在采样前定义
sharpe_ratio = pm.Deterministic('sharpe_ratio', mu / sigma)
annualized_return = pm.Deterministic('annualized_return', mu * 252)
annualized_volatility = pm.Deterministic('annualized_volatility', sigma * np.sqrt(252))
### 下跌风险
var_95 = pm.Deterministic('var_95', mu - 1.645 * sigma)
expected_shortfall = pm.Deterministic('expected_shortfall', mu - 2.063 * sigma)
print(rf"var_95: {var_95}")
print(rf"expected_shortfall: {expected_shortfall}")
### 运行MCMC采样器 - 使用单进程避免多进程问题
### 4条链 × 2000次采样 = 8000个总样本, 确保有效样本量(ESS)足够大(通常需要>1000)
### tune=1000- 预热次数, 让马尔可夫链从初始值收敛到目标分布
### 控制NUTS算法中提议点的接受概率目标值, 0.9 = 算法试图让90%的提议点被接受
print("starting sampling")
trace = pm.sample(2000, tune=1000, target_accept=0.9, random_seed=42, cores=4)
print("sampling completed")
### 采样完成后访问变量
print("风险指标后验统计:")
print(f"夏普比率: {trace.posterior['sharpe_ratio'].mean().item():.4f}")
print(f"年化收益率: {trace.posterior['annualized_return'].mean().item():.4f}")
print(f"年化波动率: {trace.posterior['annualized_volatility'].mean().item():.4f}")
print(f"VaR(95%): {trace.posterior['var_95'].mean().item():.4f}")
print(f"Expected Shortfall: {trace.posterior['expected_shortfall'].mean().item():.4f}")
### 查看参数的后验分布摘要
print("参数后验分布摘要:")
print(az.summary(trace))
### 提取统计量
mu_mean = trace.posterior['mu'].mean().item()
sigma_mean = trace.posterior['sigma'].mean().item()
print(f"平均收益率: {mu_mean:.6f} (日), {mu_mean * 252:.4f} (年化)")
print(f"波动率: {sigma_mean:.6f} (日), {sigma_mean * np.sqrt(252):.4f} (年化)")
### 后验预测检查
with model:
ppc = pm.sample_posterior_predictive(trace, random_seed=42)
### 收敛诊断 - 使用更安全的方式
print("\n收敛诊断:")
try:
# R-hat值远大于1.1,这是一个严重的警告信号
# 理想情况下,R-hat应该接近1.0(通常 < 1.1表示收敛)
# 1.607和2.971表明链没有收敛,采样结果不可靠
# 这意味着从不同起始点运行的链给出了显著不同的结果
print("R-hat统计量:")
print(az.rhat(trace.posterior['mu']).values)
print(az.rhat(trace.posterior['sigma']).values)
print("\n有效样本量:")
print(az.ess(trace.posterior['mu']).values)
print(az.ess(trace.posterior['sigma']).values)
### 1. R - hat统计量(Gelman - Rubin诊断):衡量不同链之间的一致性,值接近1表示收敛良好
### 2. 有效样本量(ESS):考虑自相关性后的实际独立样本数量,值越大越好
### 3. 蒙特卡洛标准误(MCSE):由于蒙特卡洛采样带来的估计误差,值越小表示估计越精确
# 应该这样打印完整的R-hat值
print("完整的R-hat统计量:")
print("-"*100)
print(az.rhat(trace)) # 这会显示所有参数的R-hat
print("\n完整的有效样本量:")
print("-" * 100)
print(az.ess(trace)) # 这会显示所有参数的ESS
print("\n完整的蒙特卡洛标准误:")
print("-" * 100)
print(f"MCSE: {az.mcse(trace)}")
except Exception as e:
print(f"收敛诊断失败: {str(e)}")
### 参数相关性
mu_samples = trace.posterior['mu'].values.flatten()
sigma_samples = trace.posterior['sigma'].values.flatten()
correlation = np.corrcoef(mu_samples, sigma_samples)[0, 1]
print(f"\nmu和sigma的相关性: {correlation:.4f}")
### 绘制诊断图
az.plot_trace(trace)
plt.suptitle(f'{ticker} MCMC参数后验分布', fontsize=16)
plt.tight_layout()
plt.show()
### 从后验分布中抽取参数样本
n_samples = 100
mu_samples = trace.posterior['mu'].values.flatten()[-n_samples:]
sigma_samples = trace.posterior['sigma'].values.flatten()[-n_samples:]
###设置预测参数
S0 = data['close'].iloc[-1]
#T = 252 # days
T = 5
n_sims = 50
### 蒙特卡洛模拟
predicted_prices = np.zeros((T, n_samples * n_sims))
final_prices = predicted_prices[:, -1]
for i in range(n_samples):
mu_val = mu_samples[i]
sigma_val = sigma_samples[i]
for j in range(n_sims):
daily_returns = np.random.normal(mu_val, sigma_val, T)
cumulative_returns = np.exp(np.cumsum(daily_returns))
path = S0 * cumulative_returns
predicted_prices[:, i * n_sims + j] = path
print("\n最终价格预测统计:")
print(f"中位数: {np.median(final_prices):.2f}")
print(f"均值: {np.mean(final_prices):.2f}")
print(f"95%置信区间: [{np.percentile(final_prices, 2.5):.2f}, {np.percentile(final_prices, 97.5):.2f}]")
### 计算统计量
median_path = np.median(predicted_prices, axis=1)
upper_95 = np.percentile(predicted_prices, 97.5, axis=1)
lower_95 = np.percentile(predicted_prices, 2.5, axis=1)
### 可视化结果
plt.figure(figsize=(14, 8))
plt.plot(predicted_prices, color='gray', alpha=0.05)
plt.plot(median_path, label='Median Path', color='red', linewidth=2)
plt.plot(upper_95, label='95% CI', color='blue', linestyle='--', linewidth=2)
plt.plot(lower_95, color='blue', linestyle='--', linewidth=2)
plt.title(f'{ticker} Monte Carlo Simulation')
plt.xlabel('Trading Days')
plt.ylabel('Price')
plt.legend()
plt.show()
if name == ‘main’:
在 Windows 上需要添加这行代码
mp.freeze_support()
main()