程序代写代做代考 compiler python stock-pred-checkpoint
stock-pred-checkpoint
In [1]:
import pandas as pd
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn import linear_model
from sklearn import svm
from sklearn.metrics import roc_curve, auc
from sklearn import preprocessing
import datetime
In [2]:
train = pd.read_csv(‘TrainingData.csv’)
test = pd.read_csv(‘ResultData.csv’)
/Users/vagrant/anaconda42/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [3]:
train
Out[3]:
Timestamp Variable142OPEN Variable142HIGH Variable142LOW Variable142LAST Variable143OPEN Variable143HIGH Variable143LOW Variable143LAST Variable144OPEN … Variable137LOW Variable137LAST_PRICE Variable139OPEN Variable139HIGH Variable139LOW Variable139LAST_PRICE Variable141OPEN Variable141HIGH Variable141LOW Variable141LAST_PRICE
0 40182.395833 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.061874 21.206487 41.041731 42.338085 41.041731 42.108253 2.133044 2.174362 2.117550 2.117550
1 40182.399306 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.206487 21.258134 42.108253 42.108253 41.881004 42.033364 2.117550 2.122715 2.117550 2.117550
2 40182.402778 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.268464 21.268464 42.033364 42.260614 41.803533 41.881004 2.122715 2.122715 2.112385 2.112385
3 40182.406250 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.268464 21.309782 42.033364 42.185725 42.033364 42.108253 2.122715 2.122715 2.099473 2.099473
4 40182.409722 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.278794 21.309782 42.108253 42.185725 42.033364 42.108253 2.099473 2.099473 2.094308 2.099473
5 40182.413194 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.309782 21.309782 42.033364 42.033364 41.881004 41.881004 2.099473 2.099473 2.089144 2.099473
6 40182.416667 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.278794 21.278794 41.803533 41.881004 41.728644 41.728644 2.089144 2.089144 2.083979 2.083979
7 40182.420139 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.309782 21.351100 41.728644 41.728644 41.651172 41.728644 2.083979 2.089144 2.083979 2.083979
8 40182.423611 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.351100 21.392418 41.728644 41.728644 41.651172 41.651172 2.083979 2.104638 2.083979 2.099473
9 40182.427083 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.330441 21.340771 41.651172 41.728644 41.498812 41.576283 2.099473 2.104638 2.099473 2.104638
10 40182.430556 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.340771 21.351100 41.576283 41.576283 41.423923 41.498812 2.104638 2.104638 2.099473 2.099473
11 40182.434028 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.351100 21.382089 41.423923 41.498812 41.423923 41.498812 2.099473 2.104638 2.099473 2.104638
12 40182.437500 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.382089 21.382089 41.423923 41.498812 41.423923 41.498812 2.107220 2.117550 2.107220 2.112385
13 40182.440972 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.382089 21.382089 41.498812 41.576283 41.498812 41.498812 2.112385 2.122715 2.112385 2.117550
14 40182.444444 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.371759 21.382089 41.498812 41.576283 41.498812 41.576283 2.117550 2.138209 2.117550 2.138209
15 40182.447917 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.371759 21.371759 41.576283 41.576283 41.498812 41.576283 2.143374 2.143374 2.133044 2.133044
16 40182.451389 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.361430 21.361430 41.576283 41.576283 41.576283 41.576283 2.133044 2.148538 2.133044 2.148538
17 40182.454861 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.361430 21.371759 41.576283 41.576283 41.498812 41.498812 2.148538 2.164033 2.143374 2.148538
18 40182.458333 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.340771 21.340771 41.498812 41.576283 41.498812 41.576283 2.148538 2.148538 2.148538 2.148538
19 40182.461806 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.320112 21.320112 41.576283 41.576283 41.576283 41.576283 2.148538 2.164033 2.148538 2.164033
20 40182.465278 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.299453 21.299453 41.576283 41.576283 41.498812 41.576283 2.164033 2.169197 2.164033 2.169197
21 40182.468750 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.268464 21.268464 41.498812 41.576283 41.498812 41.576283 2.169197 2.169197 2.169197 2.169197
22 40182.472222 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.268464 21.268464 41.423923 41.423923 41.346452 41.423923 2.169197 2.169197 2.164033 2.169197
23 40182.475694 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.185828 21.196157 41.423923 41.423923 41.346452 41.423923 2.169197 2.169197 2.169197 2.169197
24 40182.479167 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.185828 21.196157 41.423923 41.423923 41.271563 41.271563 2.169197 2.169197 2.169197 2.169197
25 40182.482639 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.165169 21.165169 41.271563 41.271563 41.194092 41.194092 2.169197 2.169197 2.164033 2.164033
26 40182.486111 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.165169 21.175498 41.194092 41.271563 41.194092 41.271563 2.164033 2.169197 2.164033 2.169197
27 40182.489583 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.154839 21.154839 41.271563 41.271563 41.271563 41.271563 2.169197 2.169197 2.169197 2.169197
28 40182.493056 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.154839 21.154839 41.271563 41.271563 41.194092 41.271563 2.169197 2.169197 2.169197 2.169197
29 40182.496528 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 21.134180 21.134180 41.271563 41.271563 41.194092 41.271563 2.169197 2.174362 2.169197 2.174362
… … … … … … … … … … … … … … … … … … … … … …
5892 40289.555556 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.662328 23.662328 47.363392 47.363392 47.285921 47.363392 3.767689 3.778019 3.762525 3.762525
5893 40289.559028 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.662328 23.693317 47.363392 47.363392 47.285921 47.363392 3.762525 3.772854 3.762525 3.772854
5894 40289.562500 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.713976 23.765623 47.363392 47.363392 47.285921 47.363392 3.772854 3.783184 3.772854 3.783184
5895 40289.565972 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.734635 23.734635 47.363392 47.515753 47.363392 47.515753 3.783184 3.783184 3.772854 3.778019
5896 40289.569444 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.734635 23.755294 47.515753 47.515753 47.438281 47.515753 3.778019 3.783184 3.772854 3.778019
5897 40289.572917 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.755294 23.817271 47.515753 47.668113 47.363392 47.668113 3.778019 3.793513 3.778019 3.783184
5898 40289.576389 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.817271 23.848259 47.668113 47.668113 47.590641 47.668113 3.783184 3.788348 3.783184 3.788348
5899 40289.579861 0 0 0 0 0.0 0.0 0.0 0.0 0.0 … 23.837930 23.848259 47.668113 47.743002 47.590641 47.743002 3.788348 3.793513 3.788348 3.793513
5900 40289.583333 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.817271 23.817271 47.743002 47.743002 47.668113 47.668113 3.793513 3.793513 3.783184 3.783184
5901 40289.586806 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.817271 23.817271 47.668113 47.743002 47.668113 47.668113 3.783184 3.783184 3.772854 3.772854
5902 40289.590278 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.796612 23.848259 47.668113 47.743002 47.668113 47.743002 3.772854 3.778019 3.767689 3.767689
5903 40289.593750 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.786282 23.806941 47.743002 47.743002 47.590641 47.590641 3.767689 3.772854 3.747030 3.747030
5904 40289.597222 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.744964 23.755294 47.668113 47.668113 47.438281 47.438281 3.747030 3.747030 3.736701 3.736701
5905 40289.600694 NaN NaN NaN NaN 0.0 0.0 0.0 0.0 NaN … 23.682987 23.682987 47.438281 47.438281 47.363392 47.363392 3.736701 3.736701 3.726371 3.731536
5906 40289.604167 0 0 0 0 0.0 0.0 0.0 0.0 NaN … 23.651999 23.662328 47.363392 47.363392 47.211032 47.285921 3.731536 3.741866 3.726371 3.741866
5907 40289.607639 NaN NaN NaN NaN NaN NaN NaN NaN 0.0 … 23.610681 23.641669 47.211032 47.285921 47.211032 47.211032 3.731536 3.731536 3.721206 3.726371
5908 40289.611111 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.641669 23.672658 47.211032 47.211032 47.133561 47.133561 3.726371 3.741866 3.726371 3.736701
5909 40289.614583 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.631340 23.713976 47.058672 47.133561 46.981200 46.981200 3.736701 3.741866 3.731536 3.741866
5910 40289.618056 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.713976 23.713976 46.981200 46.981200 46.906311 46.906311 3.741866 3.762525 3.741866 3.762525
5911 40289.621528 0 0 0 0 0.0 0.0 0.0 0.0 0.0 … 23.713976 23.786282 46.906311 46.906311 46.906311 46.906311 3.762525 3.767689 3.757360 3.767689
5912 40289.625000 NaN NaN NaN NaN 0.0 0.0 0.0 0.0 NaN … 23.755294 23.755294 47.133561 47.438281 47.133561 47.285921 3.767689 3.778019 3.757360 3.757360
5913 40289.628472 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.744964 23.765623 47.285921 47.438281 47.285921 47.438281 3.762525 3.767689 3.762525 3.767689
5914 40289.631944 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.662328 23.682987 47.438281 47.438281 47.363392 47.363392 3.767689 3.767689 3.762525 3.767689
5915 40289.635417 0 0 0 0 NaN NaN NaN NaN NaN … 23.682987 23.703646 47.438281 47.438281 47.363392 47.438281 3.767689 3.788348 3.767689 3.783184
5916 40289.638889 0 0 0 0 NaN NaN NaN NaN 0.0 … 23.693317 23.703646 47.438281 47.438281 47.363392 47.363392 3.783184 3.783184 3.762525 3.767689
5917 40289.642361 NaN NaN NaN NaN NaN NaN NaN NaN 0.0 … 23.682987 23.693317 47.363392 47.438281 47.285921 47.363392 3.767689 3.778019 3.767689 3.778019
5918 40289.645833 0 0 0 0 0.0 0.0 0.0 0.0 0.0 … 23.693317 23.744964 47.363392 47.515753 47.363392 47.438281 3.778019 3.783184 3.772854 3.783184
5919 40289.649306 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.755294 23.796612 47.515753 47.590641 47.363392 47.363392 3.788348 3.788348 3.783184 3.788348
5920 40289.652778 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.775953 23.920566 47.363392 47.438281 47.363392 47.438281 3.788348 3.793513 3.788348 3.793513
5921 40289.656250 NaN NaN NaN NaN NaN NaN NaN NaN NaN … 23.941225 23.941225 47.438281 47.438281 47.363392 47.363392 3.793513 3.793513 3.783184 3.783184
5922 rows × 610 columns
In [4]:
allExist = ~(train.isnull().any())
testAllExist = ~(test.isnull().any())
cols = []
for colname in test.columns:
if allExist[colname] and testAllExist[colname]:
cols.append(colname)
In [5]:
cols
Out[5]:
[‘Timestamp’,
‘Variable159OPEN’,
‘Variable159HIGH’,
‘Variable159LOW’,
‘Variable159LAST’,
‘Variable164OPEN’,
‘Variable164HIGH’,
‘Variable164LOW’,
‘Variable164LAST’,
‘Variable8OPEN’,
‘Variable8HIGH’,
‘Variable8LOW’,
‘Variable8LAST_PRICE’,
‘Variable9OPEN’,
‘Variable9HIGH’,
‘Variable9LOW’,
‘Variable9LAST_PRICE’,
‘Variable10OPEN’,
‘Variable10HIGH’,
‘Variable10LOW’,
‘Variable10LAST_PRICE’,
‘Variable11OPEN’,
‘Variable11HIGH’,
‘Variable11LOW’,
‘Variable11LAST_PRICE’,
‘Variable12OPEN’,
‘Variable12HIGH’,
‘Variable12LOW’,
‘Variable12LAST_PRICE’,
‘Variable13OPEN’,
‘Variable13HIGH’,
‘Variable13LOW’,
‘Variable13LAST_PRICE’,
‘Variable14OPEN’,
‘Variable14HIGH’,
‘Variable14LOW’,
‘Variable14LAST_PRICE’,
‘Variable15OPEN’,
‘Variable15HIGH’,
‘Variable15LOW’,
‘Variable15LAST_PRICE’,
‘Variable17OPEN’,
‘Variable17HIGH’,
‘Variable17LOW’,
‘Variable17LAST_PRICE’,
‘Variable18OPEN’,
‘Variable18HIGH’,
‘Variable18LOW’,
‘Variable18LAST_PRICE’,
‘Variable19OPEN’,
‘Variable19HIGH’,
‘Variable19LOW’,
‘Variable19LAST_PRICE’,
‘Variable20OPEN’,
‘Variable20HIGH’,
‘Variable20LOW’,
‘Variable20LAST_PRICE’,
‘Variable21OPEN’,
‘Variable21HIGH’,
‘Variable21LOW’,
‘Variable21LAST_PRICE’,
‘Variable22OPEN’,
‘Variable22HIGH’,
‘Variable22LOW’,
‘Variable22LAST_PRICE’,
‘Variable23OPEN’,
‘Variable23HIGH’,
‘Variable23LOW’,
‘Variable23LAST_PRICE’,
‘Variable24OPEN’,
‘Variable24HIGH’,
‘Variable24LOW’,
‘Variable24LAST_PRICE’,
‘Variable25OPEN’,
‘Variable25HIGH’,
‘Variable25LOW’,
‘Variable25LAST_PRICE’,
‘Variable26OPEN’,
‘Variable26HIGH’,
‘Variable26LOW’,
‘Variable26LAST_PRICE’,
‘Variable27OPEN’,
‘Variable27HIGH’,
‘Variable27LOW’,
‘Variable27LAST_PRICE’,
‘Variable28OPEN’,
‘Variable28HIGH’,
‘Variable28LOW’,
‘Variable28LAST_PRICE’,
‘Variable29OPEN’,
‘Variable29HIGH’,
‘Variable29LOW’,
‘Variable29LAST_PRICE’,
‘Variable30OPEN’,
‘Variable30HIGH’,
‘Variable30LOW’,
‘Variable30LAST_PRICE’,
‘Variable31OPEN’,
‘Variable31HIGH’,
‘Variable31LOW’,
‘Variable31LAST_PRICE’,
‘Variable32OPEN’,
‘Variable32HIGH’,
‘Variable32LOW’,
‘Variable32LAST_PRICE’,
‘Variable33OPEN’,
‘Variable33HIGH’,
‘Variable33LOW’,
‘Variable33LAST_PRICE’,
‘Variable34OPEN’,
‘Variable34HIGH’,
‘Variable34LOW’,
‘Variable34LAST_PRICE’,
‘Variable35OPEN’,
‘Variable35HIGH’,
‘Variable35LOW’,
‘Variable35LAST_PRICE’,
‘Variable36OPEN’,
‘Variable36HIGH’,
‘Variable36LOW’,
‘Variable36LAST_PRICE’,
‘Variable37OPEN’,
‘Variable37HIGH’,
‘Variable37LOW’,
‘Variable37LAST_PRICE’,
‘Variable38OPEN’,
‘Variable38HIGH’,
‘Variable38LOW’,
‘Variable38LAST_PRICE’,
‘Variable40OPEN’,
‘Variable40HIGH’,
‘Variable40LOW’,
‘Variable40LAST_PRICE’,
‘Variable41OPEN’,
‘Variable41HIGH’,
‘Variable41LOW’,
‘Variable41LAST_PRICE’,
‘Variable42OPEN’,
‘Variable42HIGH’,
‘Variable42LOW’,
‘Variable42LAST_PRICE’,
‘Variable43OPEN’,
‘Variable43HIGH’,
‘Variable43LOW’,
‘Variable43LAST_PRICE’,
‘Variable44OPEN’,
‘Variable44HIGH’,
‘Variable44LOW’,
‘Variable44LAST_PRICE’,
‘Variable45OPEN’,
‘Variable45HIGH’,
‘Variable45LOW’,
‘Variable45LAST_PRICE’,
‘Variable46OPEN’,
‘Variable46HIGH’,
‘Variable46LOW’,
‘Variable46LAST_PRICE’,
‘Variable47OPEN’,
‘Variable47HIGH’,
‘Variable47LOW’,
‘Variable47LAST_PRICE’,
‘Variable48OPEN’,
‘Variable48HIGH’,
‘Variable48LOW’,
‘Variable48LAST_PRICE’,
‘Variable49OPEN’,
‘Variable49HIGH’,
‘Variable49LOW’,
‘Variable49LAST_PRICE’,
‘Variable50OPEN’,
‘Variable50HIGH’,
‘Variable50LOW’,
‘Variable50LAST_PRICE’,
‘Variable51OPEN’,
‘Variable51HIGH’,
‘Variable51LOW’,
‘Variable51LAST_PRICE’,
‘Variable52OPEN’,
‘Variable52HIGH’,
‘Variable52LOW’,
‘Variable52LAST_PRICE’,
‘Variable53OPEN’,
‘Variable53HIGH’,
‘Variable53LOW’,
‘Variable53LAST_PRICE’,
‘Variable54OPEN’,
‘Variable54HIGH’,
‘Variable54LOW’,
‘Variable54LAST_PRICE’,
‘Variable55OPEN’,
‘Variable55HIGH’,
‘Variable55LOW’,
‘Variable55LAST_PRICE’,
‘Variable56OPEN’,
‘Variable56HIGH’,
‘Variable56LOW’,
‘Variable56LAST_PRICE’,
‘Variable57OPEN’,
‘Variable57HIGH’,
‘Variable57LOW’,
‘Variable57LAST_PRICE’,
‘Variable58OPEN’,
‘Variable58HIGH’,
‘Variable58LOW’,
‘Variable58LAST_PRICE’,
‘Variable59OPEN’,
‘Variable59HIGH’,
‘Variable59LOW’,
‘Variable59LAST_PRICE’,
‘Variable60OPEN’,
‘Variable60HIGH’,
‘Variable60LOW’,
‘Variable60LAST_PRICE’,
‘Variable61OPEN’,
‘Variable61HIGH’,
‘Variable61LOW’,
‘Variable61LAST_PRICE’,
‘Variable62OPEN’,
‘Variable62HIGH’,
‘Variable62LOW’,
‘Variable62LAST_PRICE’,
‘Variable63OPEN’,
‘Variable63HIGH’,
‘Variable63LOW’,
‘Variable63LAST_PRICE’,
‘Variable64OPEN’,
‘Variable64HIGH’,
‘Variable64LOW’,
‘Variable64LAST_PRICE’,
‘Variable65OPEN’,
‘Variable65HIGH’,
‘Variable65LOW’,
‘Variable65LAST_PRICE’,
‘Variable68OPEN’,
‘Variable68HIGH’,
‘Variable68LOW’,
‘Variable68LAST_PRICE’,
‘Variable69OPEN’,
‘Variable69HIGH’,
‘Variable69LOW’,
‘Variable69LAST_PRICE’,
‘Variable70OPEN’,
‘Variable70HIGH’,
‘Variable70LOW’,
‘Variable70LAST_PRICE’,
‘Variable71OPEN’,
‘Variable71HIGH’,
‘Variable71LOW’,
‘Variable71LAST_PRICE’,
‘Variable72OPEN’,
‘Variable72HIGH’,
‘Variable72LOW’,
‘Variable72LAST_PRICE’,
‘Variable73OPEN’,
‘Variable73HIGH’,
‘Variable73LOW’,
‘Variable73LAST_PRICE’,
‘Variable74OPEN’,
‘Variable74HIGH’,
‘Variable74LOW’,
‘Variable74LAST_PRICE’,
‘Variable76OPEN’,
‘Variable76HIGH’,
‘Variable76LOW’,
‘Variable76LAST_PRICE’,
‘Variable77OPEN’,
‘Variable77HIGH’,
‘Variable77LOW’,
‘Variable77LAST_PRICE’,
‘Variable78OPEN’,
‘Variable78HIGH’,
‘Variable78LOW’,
‘Variable78LAST_PRICE’,
‘Variable79OPEN’,
‘Variable79HIGH’,
‘Variable79LOW’,
‘Variable79LAST_PRICE’,
‘Variable80OPEN’,
‘Variable80HIGH’,
‘Variable80LOW’,
‘Variable80LAST_PRICE’,
‘Variable81OPEN’,
‘Variable81HIGH’,
‘Variable81LOW’,
‘Variable81LAST_PRICE’,
‘Variable82OPEN’,
‘Variable82HIGH’,
‘Variable82LOW’,
‘Variable82LAST_PRICE’,
‘Variable83OPEN’,
‘Variable83HIGH’,
‘Variable83LOW’,
‘Variable83LAST_PRICE’,
‘Variable85OPEN’,
‘Variable85HIGH’,
‘Variable85LOW’,
‘Variable85LAST_PRICE’,
‘Variable86OPEN’,
‘Variable86HIGH’,
‘Variable86LOW’,
‘Variable86LAST_PRICE’,
‘Variable87OPEN’,
‘Variable87HIGH’,
‘Variable87LOW’,
‘Variable87LAST_PRICE’,
‘Variable88OPEN’,
‘Variable88HIGH’,
‘Variable88LOW’,
‘Variable88LAST_PRICE’,
‘Variable89OPEN’,
‘Variable89HIGH’,
‘Variable89LOW’,
‘Variable89LAST_PRICE’,
‘Variable90OPEN’,
‘Variable90HIGH’,
‘Variable90LOW’,
‘Variable90LAST_PRICE’,
‘Variable91OPEN’,
‘Variable91HIGH’,
‘Variable91LOW’,
‘Variable91LAST_PRICE’,
‘Variable92OPEN’,
‘Variable92HIGH’,
‘Variable92LOW’,
‘Variable92LAST_PRICE’,
‘Variable93OPEN’,
‘Variable93HIGH’,
‘Variable93LOW’,
‘Variable93LAST_PRICE’,
‘Variable94OPEN’,
‘Variable94HIGH’,
‘Variable94LOW’,
‘Variable94LAST_PRICE’,
‘Variable95OPEN’,
‘Variable95HIGH’,
‘Variable95LOW’,
‘Variable95LAST_PRICE’,
‘Variable97OPEN’,
‘Variable97HIGH’,
‘Variable97LOW’,
‘Variable97LAST_PRICE’,
‘Variable98OPEN’,
‘Variable98HIGH’,
‘Variable98LOW’,
‘Variable98LAST_PRICE’,
‘Variable99OPEN’,
‘Variable99HIGH’,
‘Variable99LOW’,
‘Variable99LAST_PRICE’,
‘Variable100OPEN’,
‘Variable100HIGH’,
‘Variable100LOW’,
‘Variable100LAST_PRICE’,
‘Variable101OPEN’,
‘Variable101HIGH’,
‘Variable101LOW’,
‘Variable101LAST_PRICE’,
‘Variable102OPEN’,
‘Variable102HIGH’,
‘Variable102LOW’,
‘Variable102LAST_PRICE’,
‘Variable103OPEN’,
‘Variable103HIGH’,
‘Variable103LOW’,
‘Variable103LAST_PRICE’,
‘Variable105OPEN’,
‘Variable105HIGH’,
‘Variable105LOW’,
‘Variable105LAST_PRICE’,
‘Variable107OPEN’,
‘Variable107HIGH’,
‘Variable107LOW’,
‘Variable107LAST_PRICE’,
‘Variable108OPEN’,
‘Variable108HIGH’,
‘Variable108LOW’,
‘Variable108LAST_PRICE’,
‘Variable109OPEN’,
‘Variable109HIGH’,
‘Variable109LOW’,
‘Variable109LAST_PRICE’,
‘Variable111OPEN’,
‘Variable111HIGH’,
‘Variable111LOW’,
‘Variable111LAST_PRICE’,
‘Variable112OPEN’,
‘Variable112HIGH’,
‘Variable112LOW’,
‘Variable112LAST_PRICE’,
‘Variable113OPEN’,
‘Variable113HIGH’,
‘Variable113LOW’,
‘Variable113LAST_PRICE’,
‘Variable114OPEN’,
‘Variable114HIGH’,
‘Variable114LOW’,
‘Variable114LAST_PRICE’,
‘Variable115OPEN’,
‘Variable115HIGH’,
‘Variable115LOW’,
‘Variable115LAST_PRICE’,
‘Variable116OPEN’,
‘Variable116HIGH’,
‘Variable116LOW’,
‘Variable116LAST_PRICE’,
‘Variable117OPEN’,
‘Variable117HIGH’,
‘Variable117LOW’,
‘Variable117LAST_PRICE’,
‘Variable120OPEN’,
‘Variable120HIGH’,
‘Variable120LOW’,
‘Variable120LAST_PRICE’,
‘Variable121OPEN’,
‘Variable121HIGH’,
‘Variable121LOW’,
‘Variable121LAST_PRICE’,
‘Variable123OPEN’,
‘Variable123HIGH’,
‘Variable123LOW’,
‘Variable123LAST_PRICE’,
‘Variable124OPEN’,
‘Variable124HIGH’,
‘Variable124LOW’,
‘Variable124LAST_PRICE’,
‘Variable125OPEN’,
‘Variable125HIGH’,
‘Variable125LOW’,
‘Variable125LAST_PRICE’,
‘Variable126OPEN’,
‘Variable126HIGH’,
‘Variable126LOW’,
‘Variable126LAST_PRICE’,
‘Variable127OPEN’,
‘Variable127HIGH’,
‘Variable127LOW’,
‘Variable127LAST_PRICE’,
‘Variable129OPEN’,
‘Variable129HIGH’,
‘Variable129LOW’,
‘Variable129LAST_PRICE’,
‘Variable130OPEN’,
‘Variable130HIGH’,
‘Variable130LOW’,
‘Variable130LAST_PRICE’,
‘Variable133OPEN’,
‘Variable133HIGH’,
‘Variable133LOW’,
‘Variable133LAST_PRICE’,
‘Variable136OPEN’,
‘Variable136HIGH’,
‘Variable136LOW’,
‘Variable136LAST_PRICE’,
‘Variable137OPEN’,
‘Variable137HIGH’,
‘Variable137LOW’,
‘Variable137LAST_PRICE’,
‘Variable139OPEN’,
‘Variable139HIGH’,
‘Variable139LOW’,
‘Variable139LAST_PRICE’,
‘Variable141OPEN’,
‘Variable141HIGH’,
‘Variable141LOW’,
‘Variable141LAST_PRICE’]
In [6]:
trainX = train[cols]
trainY = train[‘TargetVariable’]
testX = test[cols]
In [7]:
trainY
Out[7]:
0 1
1 1
2 1
3 1
4 0
5 1
6 0
7 0
8 0
9 0
10 0
11 1
12 1
13 1
14 1
15 1
16 1
17 1
18 1
19 1
20 1
21 1
22 1
23 1
24 1
25 1
26 1
27 0
28 1
29 1
..
5892 1
5893 1
5894 1
5895 1
5896 1
5897 1
5898 1
5899 1
5900 1
5901 1
5902 1
5903 1
5904 0
5905 0
5906 0
5907 0
5908 0
5909 0
5910 0
5911 0
5912 1
5913 1
5914 1
5915 1
5916 1
5917 1
5918 1
5919 1
5920 1
5921 1
Name: TargetVariable, dtype: int64
In [8]:
zscoreScaler = preprocessing.StandardScaler()
normTrainX = pd.DataFrame(zscoreScaler.fit_transform(trainX))
normTestX = pd.DataFrame(zscoreScaler.transform(testX))
random_state = np.random.RandomState(0)
In [9]:
def cross_validation_AUC(trainX, trainY, numFold, classifier, name):
plt.figure()
plt.clf()
kfold = KFold(trainY.shape[0], n_folds=numFold, shuffle=True)
true_positive_rate_mean = 0.0
false_positive_rate_mean = np.linspace(0, 1, 100)
for i, (train_part, validate_part) in enumerate(kfold):
predictProbability = classifier.fit(trainX.values[train_part], trainY.values[train_part]).predict_proba(trainX.values[validate_part])
false_positive_rate, true_positive_rate, thresholds = roc_curve(trainY.values[validate_part], predictProbability[:, 1])
true_positive_rate_mean += interp(false_positive_rate_mean, false_positive_rate, true_positive_rate)
true_positive_rate_mean[0] = 0.0
area_under_curve = auc(false_positive_rate, true_positive_rate)
plt.plot(false_positive_rate, true_positive_rate, lw=1, label=’Fold %d ROC (AUC = %0.3f)’ % (i, area_under_curve))
plt.plot([0, 1], [0, 1], ‘–‘, color=(0.3, 0.3, 0.3))
true_positive_rate_mean /= len(kfold)
true_positive_rate_mean[-1] = 1.0
mean_auc = auc(false_positive_rate_mean, true_positive_rate_mean)
plt.plot(false_positive_rate_mean, true_positive_rate_mean, ‘k–‘,
label=’Mean ROC (AUC = %0.3f)’ % mean_auc, lw=2)
extra = 0.03
plt.xlim([-extra, 1 + extra])
plt.ylim([-extra, 1 + extra])
plt.xlabel(‘False Positive Rate’)
plt.ylabel(‘True Positive Rate’)
plt.title(‘Receiver operating characteristic for ‘ + name)
plt.legend(loc=”lower right”)
plt.savefig(name + str( datetime.datetime.now()) + “.png”)
plt.show()
In [10]:
def trainAndTest(trainX, trainY, testX, classfier, name):
classfier.fit(trainX, trainY)
testYProb = classfier.predict_proba(testX)
testY = classfier.predict(testX)
outProb = pd.DataFrame({“Timestamp”: test[‘Timestamp’], “Score”: testYProb[:,1]})
outPred = pd.DataFrame({“Timestamp”: test[‘Timestamp’], “Prediction”: testY})
f = open(name + ‘_prob_’ + str( datetime.datetime.now()) + ‘.csv’, ‘w’)
f.write(‘Timestamp,Score
’)
for i in range(outProb.shape[0]):
f.write(‘%f,%f
’ %(outProb[‘Timestamp’][i], outProb[‘Score’][i]))
f.close()
f = open(name + ‘_pred’ + str( datetime.datetime.now()) + ‘.csv’, ‘w’)
f.write(‘Timestamp,Score
’)
for i in range(outProb.shape[0]):
f.write(‘%f,%d
’ %(outProb[‘Timestamp’][i], outPred[‘Prediction’][i]))
f.close()
return outProb, outPred
In [11]:
classifiers = [linear_model.LogisticRegression(class_weight=’balanced’),
svm.SVC(kernel=’linear’, probability=True,
random_state=random_state, class_weight=’balanced’)]
names = [‘Logistic Regression’, “SVM”]
In [ ]:
cross_validation_AUC(trainX, trainY, 3, classifiers[0], names[0])
In [ ]:
cross_validation_AUC(normTrainX, trainY, 3, classifiers[1], names[1])
In [ ]:
trainAndTest(trainX, trainY, testX, classifiers[0], names[0])
In [ ]:
trainAndTest(normTrainX, trainY, normTestX, classifiers[1], names[1])
In [ ]: