# 量化分析师的Python日记【第4天：一大波金融Library来袭之scipy篇】

## 一、SciPy概述

``````import numpy as np
import scipy.stats as stats
import scipy.optimize as opt
``````

## 二、统计部分

### 2.1 生成随机数

``````rv_unif = stats.uniform.rvs(size=10)
print rv_unif
rv_beta = stats.beta.rvs(size=10, a=4, b=2)
print rv_beta

[ 0.6419336   0.48403001  0.89548809  0.73837498  0.65744886  0.41845577
0.3823512   0.0985301   0.66785949  0.73163835]
[ 0.82164685  0.69563836  0.74207073  0.94348192  0.82979411  0.87013796
0.78412952  0.47508183  0.29296073  0.52551156]
``````

``````np.random.seed(seed=2015)
rv_beta = stats.beta.rvs(size=10, a=4, b=2)
print "method 1:"
print rv_beta

np.random.seed(seed=2015)
beta = stats.beta(a=4, b=2)
print "method 2:"
print beta.rvs(size=10)

method 1:
[ 0.43857338  0.9411551   0.75116671  0.92002864  0.62030521  0.56585548
0.41843548  0.5953096   0.88983036  0.94675351]
method 2:
[ 0.43857338  0.9411551   0.75116671  0.92002864  0.62030521  0.56585548
0.41843548  0.5953096   0.88983036  0.94675351]
``````

### 2.2 假设检验

``````norm_dist = stats.norm(loc=0.5, scale=2)
n = 200
dat = norm_dist.rvs(size=n)
print "mean of data is: " + str(np.mean(dat))
print "median of data is: " + str(np.median(dat))
print "standard deviation of data is: " + str(np.std(dat))

mean of data is: 0.383309149888
median of data is: 0.394980561217
standard deviation of data is: 2.00589851641
``````

``````mu = np.mean(dat)
sigma = np.std(dat)
stat_val, p_val = stats.kstest(dat, 'norm', (mu, sigma))
print 'KS-statistic D = %6.3f p-value = %6.4f' % (stat_val, p_val)

KS-statistic D =  0.037 p-value = 0.9428
``````

``````stat_val, p_val = stats.ttest_1samp(dat, 0)
print 'One-sample t-statistic D = %6.3f, p-value = %6.4f' % (stat_val, p_val)

One-sample t-statistic D =  2.696, p-value = 0.0076
``````

``````norm_dist2 = stats.norm(loc=-0.2, scale=1.2)
dat2 = norm_dist2.rvs(size=n/2)
stat_val, p_val = stats.ttest_ind(dat, dat2, equal_var=False)
print 'Two-sample t-statistic D = %6.3f, p-value = %6.4f' % (stat_val, p_val)

Two-sample t-statistic D =  3.572, p-value = 0.0004
``````

`stats`还提供其他大量的假设检验函数，如`bartlett``levene`用于检验方差是否相等；`anderson_ksam`p用于进行Anderson-Darling的K-样本检验等。

### 2.3 其他函数

``````g_dist = stats.gamma(a=2)
print "quantiles of 2, 4 and 5:"
print g_dist.cdf([2, 4, 5])
print "Values of 25%, 50% and 90%:"
print g_dist.pdf([0.25, 0.5, 0.95])

quantiles of 2, 4 and 5:
[ 0.59399415  0.90842181  0.95957232]
Values of 25%, 50% and 90%:
[ 0.1947002   0.30326533  0.36740397]
``````

``````stats.norm.moment(6, loc=0, scale=1)

15.000000000895332
``````

`describe`函数提供对数据集的统计描述分析，包括数据样本大小，极值，均值，方差，偏度和峰度：

``````norm_dist = stats.norm(loc=0, scale=1.8)
dat = norm_dist.rvs(size=100)
info = stats.describe(dat)
print "Data size is: " + str(info[0])
print "Minimum value is: " + str(info[1][0])
print "Maximum value is: " + str(info[1][1])
print "Arithmetic mean is: " + str(info[2])
print "Unbiased variance is: " + str(info[3])
print "Biased skewness is: " + str(info[4])
print "Biased kurtosis is: " + str(info[5])

Data size is: 100
Minimum value is: -5.73556523159
Maximum value is: 3.77439818033
Arithmetic mean is: -0.00559348382755
Unbiased variance is: 3.64113204268
Biased skewness is: -0.600615731841
Biased kurtosis is: 0.432147856587
``````

``````norm_dist = stats.norm(loc=0, scale=1.8)
dat = norm_dist.rvs(size=100)
mu, sigma = stats.norm.fit(dat)
print "MLE of data mean:" + str(mu)
print "MLE of data standard deviation:" + str(sigma)

MLE of data mean:0.00712958665203
MLE of data standard deviation:1.71228079199
``````

`pearsonr``spearmanr`可以计算`Pearson``Spearman`相关系数，这两个相关系数度量了两组数据的相互线性关联程度：

``````norm_dist = stats.norm()
dat1 = norm_dist.rvs(size=100)
exp_dist = stats.expon()
dat2 = exp_dist.rvs(size=100)
cor, pval = stats.pearsonr(dat1, dat2)
print "Pearson correlation coefficient: " + str(cor)
cor, pval = stats.pearsonr(dat1, dat2)
print "Spearman's rank correlation coefficient: " + str(cor)

Pearson correlation coefficient: -0.0345336831321
Spearman's rank correlation coefficient: -0.0345336831321
``````

``````x = stats.chi2.rvs(3, size=50)
y = 2.5 + 1.2 * x + stats.norm.rvs(size=50, loc=0, scale=1.5)
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print "Slope of fitted model is:" , slope
print "Intercept of fitted model is:", intercept
print "R-squared:", r_value**2

Slope of fitted model is: 1.20010505908
Intercept of fitted model is: 2.04778311819
R-squared: 0.781316678233
``````

## 三、优化部分

### 3.1 无约束优化问题

``````def rosen(x):
"""The Rosenbrock function"""
return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
``````

``````x_0 = np.array([0.5, 1.6, 1.1, 0.8, 1.2])
res = opt.minimize(rosen, x_0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
print "Result of minimizing Rosenbrock function via Nelder-Mead Simplex algorithm:"
print res

Optimization terminated successfully.
Current function value: 0.000000
Iterations: 436
Function evaluations: 706
Result of minimizing Rosenbrock function via Nelder-Mead Simplex algorithm:
status: 0
nfev: 706
success: True
fun: 1.6614969876635003e-17
x: array([ 1.,  1.,  1.,  1.,  1.])
message: 'Optimization terminated successfully.'
nit: 436
``````

Rosenbrock函数的性质比较好，简单的优化方法就可以处理了，还可以在`minimize`中使用`method='powell'`来指定使用Powell's method。这两种简单的方法并不使用函数的梯度，在略微复杂的情形下收敛速度比较慢，下面让我们来看一下用到函数梯度进行寻优的方法。

3.1.2 Broyden-Fletcher-Goldfarb-Shanno法

Broyden-Fletcher-Goldfarb-Shanno（BFGS）法用到了梯度信息，首先求一下Rosenbrock函数的梯度：

``````def rosen_der(x):
xm = x[1:-1]
xm_m1 = x[:-2]
xm_p1 = x[2:]
der = np.zeros_like(x)
der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
der[-1] = 200*(x[-1]-x[-2]**2)
return der
``````

``````res = opt.minimize(rosen, x_0, method='BFGS', jac=rosen_der, options={'disp': True})
print "Result of minimizing Rosenbrock function via Broyden-Fletcher-Goldfarb-Shanno algorithm:"
print res

Optimization terminated successfully.
Current function value: 0.000000
Iterations: 52
Function evaluations: 63
Result of minimizing Rosenbrock function via Broyden-Fletcher-Goldfarb-Shanno algorithm:
status: 0
success: True
njev: 63
nfev: 63
hess_inv: array([[ 0.00726515,  0.01195827,  0.0225785 ,  0.04460906,  0.08923649],
[ 0.01195827,  0.02417936,  0.04591135,  0.09086889,  0.18165604],
[ 0.0225785 ,  0.04591135,  0.09208689,  0.18237695,  0.36445491],
[ 0.04460906,  0.09086889,  0.18237695,  0.36609277,  0.73152922],
[ 0.08923649,  0.18165604,  0.36445491,  0.73152922,  1.46680958]])
fun: 3.179561068096293e-14
x: array([ 1.        ,  0.99999998,  0.99999996,  0.99999992,  0.99999983])
message: 'Optimization terminated successfully.'
jac: array([  4.47207141e-06,   1.30357917e-06,  -1.86454207e-07,
-2.00564982e-06,   4.98799446e-07])
``````

``````def rosen_hess(x):
x = np.asarray(x)
H = np.diag(-400*x[:-1],1) - np.diag(400*x[:-1],-1)
diagonal = np.zeros_like(x)
diagonal[0] = 1200*x[0]**2-400*x[1]+2
diagonal[-1] = 200
diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]
H = H + np.diag(diagonal)
return H
``````
``````res = opt.minimize(rosen, x_0, method='Newton-CG', jac=rosen_der, hess=rosen_hess, options={'xtol': 1e-8, 'disp': True})
print "Result of minimizing Rosenbrock function via Newton-Conjugate-Gradient algorithm (Hessian):"
print res

Optimization terminated successfully.
Current function value: 0.000000
Iterations: 20
Function evaluations: 22
Hessian evaluations: 20
Result of minimizing Rosenbrock function via Newton-Conjugate-Gradient algorithm:
status: 0
success: True
njev: 41
nfev: 22
fun: 1.47606641102778e-19
x: array([ 1.,  1.,  1.,  1.,  1.])
message: 'Optimization terminated successfully.'
nhev: 20
jac: array([ -3.62847530e-11,   2.68148992e-09,   1.16637362e-08,
4.81693414e-08,  -2.76999090e-08])
``````

``````def rosen_hess_p(x, p):
x = np.asarray(x)
Hp = np.zeros_like(x)
Hp[0] = (1200*x[0]**2 - 400*x[1] + 2)*p[0] - 400*x[0]*p[1]
Hp[1:-1] = -400*x[:-2]*p[:-2]+(202+1200*x[1:-1]**2-400*x[2:])*p[1:-1] \
-400*x[1:-1]*p[2:]
Hp[-1] = -400*x[-2]*p[-2] + 200*p[-1]
return Hp

res = opt.minimize(rosen, x_0, method='Newton-CG', jac=rosen_der, hessp=rosen_hess_p, options={'xtol': 1e-8, 'disp': True})
print "Result of minimizing Rosenbrock function via Newton-Conjugate-Gradient algorithm (Hessian times arbitrary vector):"
print res

Optimization terminated successfully.
Current function value: 0.000000
Iterations: 20
Function evaluations: 22
Hessian evaluations: 58
Result of minimizing Rosenbrock function via Newton-Conjugate-Gradient algorithm (Hessian times arbitrary vector):
status: 0
success: True
njev: 41
nfev: 22
fun: 1.47606641102778e-19
x: array([ 1.,  1.,  1.,  1.,  1.])
message: 'Optimization terminated successfully.'
nhev: 58
jac: array([ -3.62847530e-11,   2.68148992e-09,   1.16637362e-08,
4.81693414e-08,  -2.76999090e-08])
``````

### 3.2. 约束优化问题

``````def func(x, sign=1.0):
""" Objective function """
return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2)

def func_deriv(x, sign=1.0):
""" Derivative of objective function """
dfdx0 = sign*(-2*x[0] + 2*x[1] + 2)
dfdx1 = sign*(2*x[0] - 4*x[1])
return np.array([ dfdx0, dfdx1 ])
``````

``````cons = ({'type': 'eq',  'fun': lambda x: np.array([x[0]**3 - x[1]]), 'jac': lambda x: np.array([3.0*(x[0]**2.0), -1.0])},
{'type': 'ineq', 'fun': lambda x: np.array([x[1] - 1]), 'jac': lambda x: np.array([0.0, 1.0])})
``````

``````res = opt.minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, method='SLSQP', options={'disp': True})
print "Result of unconstrained optimization:"
print res
res = opt.minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, constraints=cons, method='SLSQP', options={'disp': True})
print "Result of constrained optimization:"
print res

Optimization terminated successfully.    (Exit mode 0)
Current function value: -2.0
Iterations: 4
Function evaluations: 5
Result of unconstrained optimization:
status: 0
success: True
njev: 4
nfev: 5
fun: -1.9999999999999996
x: array([ 2.,  1.])
message: 'Optimization terminated successfully.'
jac: array([ -2.22044605e-16,  -0.00000000e+00,   0.00000000e+00])
nit: 4
Optimization terminated successfully.    (Exit mode 0)
Current function value: -1.00000018311
Iterations: 9
Function evaluations: 14
Result of constrained optimization:
status: 0
success: True
njev: 9
nfev: 14
fun: -1.0000001831052137
x: array([ 1.00000009,  1.        ])
message: 'Optimization terminated successfully.'
jac: array([-1.99999982,  1.99999982,  0.        ])
nit: 9
``````

SciPy中的优化模块还有一些特殊定制的函数，专门处理能够转化为优化求解的一些问题，如方程求根、最小方差拟合等，可到SciPy优化部分的指引页面查看。