期权高频数据准备
本notebook根据指定的时间区间整理并保存option_data.csv
文件,请与 期权市场一周纵览 notebook配合使用。
import pandas as pd
import numpy as np
pd.options.display.float_format = '{:,>.4f}'.format
calendar = Calendar('China.SSE')
class _format_checker:
def __init__(self, calendar):
self.calendar = calendar
def _format_check(self, instrumentID):
contractType = instrumentID[6] + 'O'
contractYear = int(instrumentID[7:9]) + 2000
contractMonth = int(instrumentID[9:11])
contractExp = Date.NthWeekDay(4, Wednesday, contractMonth, contractYear)
contractExp = self.calendar.adjustDate(contractExp, BizDayConvention.Following)
contractStrike = float(instrumentID[-4:]) / 1000.0
return contractType, contractExp, contractStrike
checker = _format_checker(calendar)
tradingDays = calendar.bizDatesList(Date(2015,3,5), Date(2015,3,12))
names, instrumentIDs = (OptionsDataSnapShot().optionId.unique(), OptionsDataSnapShot().instrumentID.unique())
data = pd.DataFrame(names, columns = ['optionId'])
instrumentIDs = pd.Series(instrumentIDs)
data = data.join(pd.DataFrame(list(instrumentIDs.apply(checker._format_check)), columns= ['contractType', 'expDate', 'strikePrice']))
data[:5]
optionId | contractType | expDate | strikePrice | |
---|---|---|---|---|
0 | 10000001 | CO | March 25th, 2015 | 2.2000 |
1 | 10000002 | CO | March 25th, 2015 | 2.2500 |
2 | 10000003 | CO | March 25th, 2015 | 2.3000 |
3 | 10000004 | CO | March 25th, 2015 | 2.3500 |
4 | 10000005 | CO | March 25th, 2015 | 2.4000 |
tradingDaysStr = [''.join(date.toISO().split('-')) for date in tradingDays]
tradingDaysStr
['20150305', '20150306', '20150309', '20150310', '20150311']
res = pd.DataFrame()
spotData = []
for day in tradingDaysStr:
tmp = spotData
try:
spotData = DataAPI.MktTicksHistOneDayGet('510050.XSHG', date = day, field = ['dataDate', 'datasTime', 'secOffset', 'lastPrice'])
spotData = spotData.drop(0)
except Exception, e:
print e
spotData = tmp
for opt in names:
try:
sample = DataAPI.MktOptionTicksHistOneDayGet(optionId = opt,date = day)#field = ['optionId', 'dataDate', 'dataTime' 'secOffset', 'lastPrice'])
sample = sample.drop_duplicates(['secOffset'])
spotPrice = np.zeros((len(sample),))
j = 0
index = spotData.index
for i, secOffset in enumerate(sample.secOffset):
currentSpotSecOffset = spotData.loc[index[j], 'secOffset']*1000
while currentSpotSecOffset < secOffset and j < len(index)-1:
j = j + 1
currentSpotSecOffset = spotData.loc[index[j], 'secOffset']*1000
if j>=1:
spotPrice[i] = spotData.loc[index[j-1], 'lastPrice']
else:
spotPrice[i] = spotData.loc[index[j], 'lastPrice']
sample['spotPrice'] = spotPrice
res = res.append(sample)
except Exception, e:
print e
print day + ' finished!'
20150305 finished!
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000030&date=20150306&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000032&date=20150306&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000033&date=20150306&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000035&date=20150306&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000054&date=20150306&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000056&date=20150306&startSecOffset=&endSecOffset=
20150306 finished!
20150309 finished!
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000039&date=20150310&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000056&date=20150310&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000064&date=20150310&startSecOffset=&endSecOffset=
20150310 finished!
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000039&date=20150311&startSecOffset=&endSecOffset=
-1:No Data Returned for request: /market/getOptionTicksHistOneDay.csv?field=&optionId=10000064&date=20150311&startSecOffset=&endSecOffset=
20150311 finished!
res.optionId = res.optionId.astype('str')
res = res.merge(data, how = 'left', on = 'optionId')
dateData, idData, volumeData = res.dataDate, res.optionId, res['volume']
previous = [dateData[0], idData[0], 0]
newVolume = np.zeros((len(dateData),))
count = 0
for date, ids, volume in zip(dateData, idData, volumeData ):
if date == previous[0] and ids == previous[1]:
newVolume[count] = volume - previous[2]
else:
newVolume[count] = volume
previous[0] = date
previous[1] = ids
previous[2] = volume
count = count + 1
res.volume = newVolume
res['pdDateTime'] = res.expDate.apply(lambda x: x.toDateTime())
optData = pd.DataFrame()
optData['contractType'] = res['contractType']
optData['valuationDate'] = res['dataDate']
optData['expDate'] = res['expDate']
optData['strikePrice'] = res['strikePrice']
optData['lastPrice'] = res['lastPrice']
optData['optionId'] = res['optionId'].astype('str')
optData['Type'] = Option.Call
optData['spotPrice'] = res.spotPrice
optData.loc[optData['contractType'] == 'PO','Type'] = Option.Put
optData['valuationDate'] = [Date(int(date.split('-')[0]),int(date.split('-')[1]),int(date.split('-')[2])) for date in optData['valuationDate']]
dc = DayCounter('Actual/365 (Fixed)')
optData['ttm'] = [dc.yearFraction(date1, date2) for date1, date2 in zip(optData['valuationDate'], optData['expDate'])]
optData['lastPrice(vol)'] = BSMImpliedVolatity(optData['Type'], optData['strikePrice'], optData['spotPrice'], 0.0, 0.0, optData['ttm'], optData['lastPrice'])
optData['bid1(vol)'] = BSMImpliedVolatity(optData['Type'], optData['strikePrice'], optData['spotPrice'], 0.0, 0.0, optData['ttm'], res.bidPrice1)
optData['ask1(vol)'] = BSMImpliedVolatity(optData['Type'], optData['strikePrice'], optData['spotPrice'], 0.0, 0.0, optData['ttm'], res.askPrice1)
res1 = res.merge(optData[[u'spotPrice', u'ttm', u'lastPrice(vol)', u'bid1(vol)', u'ask1(vol)']], left_index=True, right_index=True)
res1 = res1.dropna(how = 'any')
res1['bidAskSpread(bps)'] = (res1.askPrice1 - res1.bidPrice1) * 10000
res1['bidAskSpread(vol bps)'] = (res1['ask1(vol)'] - res1['bid1(vol)']) * 10000
res1.to_csv('option_data.csv')