#-*- coding: utf-8 -*-
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
from data_deal import data_deal
import xgboost as xgb
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
#data = np.random.rand(5000,10)
#label = np.random.randint(2, size=5000)
data, label = data_deal()
x_train,x_test,y_train,y_test = train_test_split(data,label,test_size=0.3,random_state=0)
#data = data[:100000]
#label = label[:100000]
'''
gbdt_model = GradientBoostingClassifier(n_estimators = 100)
gbdt_model.fit(data, label)
p = gbdt_model.apply(data)
'''
dtrain = xgb.DMatrix(x_train, label=y_train)
#dtest = xgb.DMatrix(data)
dtrain_x = xgb.DMatrix(x_train)
param={'booster':'gbtree',
'objective': 'binary:logistic',
'eval_metric': 'auc',
'max_depth':5,
'lambda':10,
'subsample':0.8,
'colsample_bytree':0.8,
'min_child_weight':10,
'eta': 0.1,
'seed':0,
'nthread':8,
'silent':1}
evallist = [(dtrain,'train')]
num_round = 300
bst = xgb.train(param, dtrain, num_round, evallist)
bst.save_model('xgb_test.model')
#bst.load_model('xgb_test.model')
p = bst.predict(dtrain_x, pred_leaf=True)
one_hot_encoder = OneHotEncoder()
one_hot_encoder.fit(p)
joblib.dump(one_hot_encoder, "one_hot_encoder.model")
one_hot_encoder_feature = one_hot_encoder.transform(p).toarray()
lr_model = LogisticRegression()
lr_model.fit(one_hot_encoder_feature, y_train)
joblib.dump(lr_model, "lr_test.model")
predict_label = lr_model.predict(one_hot_encoder_feature)
print 'train:', roc_auc_score(y_train, predict_label)
print 'train:', classification_report(y_train, predict_label)
print 'train:', confusion_matrix(y_train, predict_label)
'''
dtest_x = xgb.DMatrix(x_test)
bst_load = xgb.Booster({'nthread': 8}) # init model
bst_load.load_model('xgb_test.model')
p = bst_load.predict(dtest_x, pred_leaf=True)
one_hot_encoder_load = joblib.load('one_hot_encoder.model')
one_hot_encoder_feature = one_hot_encoder_load.transform(p).toarray()
lr_model_load = joblib.load('lr_test.model')
predict_label = lr_model_load.predict(one_hot_encoder_feature)
print 'test:', roc_auc_score(y_test, predict_label)
print 'test:', classification_report(y_test, predict_label)
print 'test:', confusion_matrix(y_test, predict_label)
'''