# -*- coding: utf-8 -*-
"""
Created on Wed May 31 23:19:21 2023

@author: zhang lanxin
"""


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import xgboost as xgb
import os

#X
X = pd.read_csv('Features-Exp.csv')
X =X[['2DTTC']]
#Y
Y = pd.read_csv('Abnormal-Exp.csv')



'''
#Sometimes no standardscaler is better for the classifciation result
'''
from sklearn.preprocessing import Normalizer, MinMaxScaler

X_train, X_test,  y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=24)

standS = StandardScaler().fit(X_train)
X_train_std = standS.fit_transform(X_train)
X_test_std = standS.fit_transform(X_test)


import itertools

# Create a confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=14)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')



# def predict_driving_performance(X, Y, predictType):
#     X_train, X_test,  y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=24)
#     X_test1, X_val, y_test1, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=24)
#     seed= 24
#     xgb1 = xgb.sklearn.XGBClassifier(
#         learning_rate =0.1,
#         n_estimators=10, #100
#         max_depth=5,
#         min_child_weight=11,
#         gamma=0.1,
#         subsample=0.8,
#         colsample_bytree=0.7,
#         objective='multi:softprob',
#         num_class= 2,
#         n_jobs= -1,
#         scale_pos_weight=1,
#         seed=seed)
#     #we will use xgboost to do the classification
#     xgb1.fit(X_train, y_train)
    
#     y_pred1 = xgb1.predict(X_val)
#     print(y_pred1)
#     #y_pred1 = int(y_pred)
#     #confusion matrix and classification report
    
#     cm = confusion_matrix(y_val, y_pred1)
    
#     print(classification_report(y_val, y_pred1))
    
#     labels = ['Anomaly','Normal']
#     fig = plt.figure(figsize=(11.963,9.3))
#     fig.add_subplot(211)
#     plot_confusion_matrix(cm, labels, title="Confusion Matrix---XGBoost", cmap=plt.cm.Greens)
#     #plt.savefig('XGboost1-Exp',dpi=600)
#     ###All test
#     y_pred2 = xgb1.predict(X_test)
#     print(y_pred2)
#     #confusion matrix and classification report
    
#     cm = confusion_matrix(y_test, y_pred2)
    
#     print(classification_report(y_test, y_pred2))
    
#     labels = ['Anomaly','Normal']
#     fig = plt.figure(figsize=(11.963,9.3))
#     fig.add_subplot(211)
#     plot_confusion_matrix(cm, labels, title="Confusion Matrix---XGBoost2", cmap=plt.cm.Greens)
#     #plt.savefig('XGboost2-Exp',dpi=600)
    
#     fig, ax = plt.subplots(figsize = (15, 10))
#     ind = np.arange(1) #8
#     width = 0.15
#     feature_importtance = xgb1.feature_importances_
#     rect1 = ax.bar(ind+width, feature_importtance)
#     ax.set_xticks(ind+width/2)
#     ax.set_xticklabels(('2DTTC'))
#     ax.set_ylabel('Importance')
#     ax.set_xlabel('Measurements')
#     ax.set_title('Relative Importance of various measurements to predict ' + predictType)
#     #plt.show()
#     #plt.savefig('Important features-Exp',dpi=600)

#Let's predict
#predict_driving_performance(X, Y, 'Abnormal driving behaviour')

#
normal_index  =  np.array( Y == 0 )
abnormal_index = np.array( Y != 0 )
X_normal = X[normal_index]
X_outliers = X[abnormal_index]
y_normal = Y[normal_index]
y_outliers = Y[abnormal_index]

X_normal_train, X_normal_valid,  y_normal_train, y_normal_valid = train_test_split(X_normal, y_normal, test_size=0.30
                                                                                   , random_state=24)
X_val =  pd.concat([X_normal_valid, X_outliers], axis=0)
map_v = {0:1, 1:-1, 2:-1}

y_val_v =  pd.concat([y_normal_valid, y_outliers], axis=0)
y_val_v1 = y_val_v['Label'].map(map_v)
y_trian_try = y_normal_train['Label'].map(map_v)
y_normal_valid_try = y_normal_valid['Label'].map(map_v)
y_all = Y['Label'].map(map_v)




# fit the model
from sklearn.ensemble import IsolationForest
import time
print('IsolationForest:')

rng = np.random.RandomState(39)
clf = IsolationForest(n_estimators = 10, max_samples= 'auto', random_state=rng, contamination= 'auto')

t = []
f1score = []
cm_record = []
classification_report_record = []
FPR_record = []
TPR_record = []
ACC_record = []

t0 = time.time()
clf.fit(X_normal_train)
t1 = time.time()
t_IsolationForest = t1 - t0
print("IsolationForest {:.3} s".format(t1 - t0))
t.append(t_IsolationForest)


y_pred_train = clf.predict(X_normal_train)
y_pred_valid = clf.predict(X_normal_valid)
y_pred_outliers = clf.predict(X_outliers)

n_error_train = y_pred_train[y_pred_train == -1].size
n_error_valid = y_pred_valid[y_pred_valid == -1].size
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

n_error_train_rate = y_pred_train[y_pred_train == -1].size / y_pred_train.size
n_error_valid_rate = y_pred_valid[y_pred_valid == -1].size /y_pred_valid.size
n_error_outliers_rate = y_pred_outliers[y_pred_outliers == 1].size /y_pred_outliers.size

y_pred_val = clf.predict(X_val)

cm_v = confusion_matrix(y_val_v1, y_pred_val)
print(classification_report(y_val_v1, y_pred_val))

f1score.append(f1_score(y_val_v1, y_pred_val))

print('f1_score:')
print(f1_score(y_val_v1, y_pred_val))
print('~~~~~~~~~')

cm_record.append(cm_v)
classification_report_record.append(classification_report(y_val_v1, y_pred_val))

'''
Plot confusion matrix
'''
import itertools

# Create a confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=14)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                  horizontalalignment="center",
                  color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
labels = ['Anomaly','Normal'] #labels = ['Leakage','No leakage']
fig = plt.figure(figsize=(11.963,9.3))
fig.add_subplot(211)
plot_confusion_matrix(cm_v, labels, title="Confusion Matrix---IsolationForest", cmap=plt.cm.Blues)
plt.savefig('IsolationForest-Exp-OS.png',dpi=600)
# Fall out or false positive rate
#FPR = FP/(FP+TN)
FPR = cm_v[1][0] / (cm_v[1][0] + cm_v[1][1])

#TPR = TP /（TP + FN） 
TPR = cm_v[0][0] / (cm_v[0][0] + cm_v[0][1])
ACC = (cm_v[0][0] + cm_v[1][1]) / (cm_v[0][0] + cm_v[0][1] + cm_v[1][0] + cm_v[1][1])

FPR_record.append(FPR)
TPR_record.append(TPR)
ACC_record.append(ACC)

print('FPR:')
print(FPR)

print('TPR:')
print(TPR)

print('ACC:')
print(ACC)

f1_score(y_val_v1, y_pred_val)


# '''
# # LocalOutlierFactor
# '''
# from sklearn.neighbors import LocalOutlierFactor
# LOF = LocalOutlierFactor(novelty=True)

# print('LocalOutlierFactor:')

# t0 = time.time()
# LOF.fit(X_normal_train)
# t1 = time.time()
# t_LocalOutlierFactor = t1 - t0
# print("LocalOutlierFactor {:.3} s".format(t1 - t0))

# t.append(t_LocalOutlierFactor)

# y_pred_train = LOF.predict(X_normal_train)
# y_pred_valid = LOF.predict(X_normal_valid)
# y_pred_outliers = LOF.predict(X_outliers)

# n_error_train = y_pred_train[y_pred_train == -1].size
# n_error_valid = y_pred_valid[y_pred_valid == -1].size
# n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

# n_error_train_rate = y_pred_train[y_pred_train == -1].size / y_pred_train.size
# n_error_valid_rate = y_pred_valid[y_pred_valid == -1].size /y_pred_valid.size
# n_error_outliers_rate = y_pred_outliers[y_pred_outliers == 1].size /y_pred_outliers.size

# y_pred_val = LOF.predict(X_val)
# cm_v = confusion_matrix(y_val_v1, y_pred_val)
# print(classification_report(y_val_v1, y_pred_val))
# print('f1_score:')
# print(f1_score(y_val_v1, y_pred_val))
# print('~~~~~~~~~')
# f1score.append(f1_score(y_val_v1, y_pred_val))
# cm_record.append(cm_v)
# classification_report_record.append(classification_report(y_val_v1, y_pred_val))

# FPR = cm_v[1][0] / (cm_v[1][0] + cm_v[1][1])
# FPR_record.append(FPR)

# #TPR = TP /（TP + FN） 
# TPR = cm_v[0][0] / (cm_v[0][0] + cm_v[0][1])
# ACC = (cm_v[0][0] + cm_v[1][1]) / (cm_v[0][0] + cm_v[0][1] + cm_v[1][0] + cm_v[1][1])

# FPR_record.append(FPR)
# TPR_record.append(TPR)
# ACC_record.append(ACC)

# print('FPR:')
# print(FPR)
# print('~~~~~~~~~')

# print('TPR:')
# print(TPR)
# print('~~~~~~~~~')

# print('ACC:')
# print(ACC)
# print('~~~~~~~~~')


# fig = plt.figure(figsize=(11.963,9.3))
# fig.add_subplot(212)
# plot_confusion_matrix(cm_v, labels, title="Confusion Matrix---LocalOutlierFactor", cmap=plt.cm.Reds)
# #plt.savefig('LocalOutlierFactor-Exp-OS.png',dpi=600)


# '''
# # Robust covariance
# sklearn.covariance.EllipticEnvelope assumes the data is Gaussian and learns an ellipse. 
# It thus degrades when the data is not unimodal. 
# Notice however that this estimator is robust to outliers.
# '''
# from sklearn.covariance import EllipticEnvelope
# RCE = EllipticEnvelope(contamination=0.17396393)

# t0 = time.time()
# RCE.fit(X_normal_train)
# t1 = time.time()
# t_RCE = t1 - t0
# print("Robust covariance {:.3} s".format(t1 - t0))

# t.append(t_RCE)

# y_pred_train = RCE.predict(X_normal_train)
# y_pred_valid = RCE.predict(X_normal_valid)
# y_pred_outliers = RCE.predict(X_outliers)

# n_error_train = y_pred_train[y_pred_train == -1].size
# n_error_valid = y_pred_valid[y_pred_valid == -1].size
# n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

# n_error_train_rate = y_pred_train[y_pred_train == -1].size / y_pred_train.size
# n_error_valid_rate = y_pred_valid[y_pred_valid == -1].size /y_pred_valid.size
# n_error_outliers_rate = y_pred_outliers[y_pred_outliers == 1].size /y_pred_outliers.size

# y_pred_val = RCE.predict(X_val)
# cm_v = confusion_matrix(y_val_v1, y_pred_val)
# print(classification_report(y_val_v1, y_pred_val))
# print('f1_score:')
# print(f1_score(y_val_v1, y_pred_val))
# print('~~~~~~~~~')
# f1score.append(f1_score(y_val_v1, y_pred_val))
# cm_record.append(cm_v)
# classification_report_record.append(classification_report(y_val_v1, y_pred_val))

# FPR = cm_v[1][0] / (cm_v[1][0] + cm_v[1][1])
# FPR_record.append(FPR)
# #TPR = TP /（TP + FN） 
# TPR = cm_v[0][0] / (cm_v[0][0] + cm_v[0][1])
# ACC = (cm_v[0][0] + cm_v[1][1]) / (cm_v[0][0] + cm_v[0][1] + cm_v[1][0] + cm_v[1][1])

# FPR_record.append(FPR)
# TPR_record.append(TPR)
# ACC_record.append(ACC)

# print('FPR:')
# print(FPR)
# print('~~~~~~~~~')

# print('TPR:')
# print(TPR)
# print('~~~~~~~~~')

# print('ACC:')
# print(ACC)
# print('~~~~~~~~~')

# fig = plt.figure(figsize=(11.963,9.3))
# fig.add_subplot(222)
# plot_confusion_matrix(cm_v, labels, title="Confusion Matrix---RobustCovariance", cmap=plt.cm.Purples)
# plt.savefig('Robust covariance-Exp-OS.png',dpi=600)



# '''
# HELM
# '''
# import numpy as np
# import random

# import mapminmax as mmm
# import HELM

# normal_index  =  np.array( Y == 0 )
# abnormal_index = np.array( Y != 0 )
# X_normal = X[normal_index] # X
# X_outliers = X[abnormal_index] #
# y_normal = Y[normal_index]
# y_outliers = Y[abnormal_index]

# train_x, vali_x,  y_normal_train, y_normal_valid = train_test_split(X_normal, y_normal, test_size=0.30, random_state=24)
# test_x      = X_outliers #np.random.rand(2000,200)

# map_v = {0:1, 1:-1, 2:-1}
# y_val_v =  pd.concat([y_normal_valid, y_outliers], axis=0)
# y_val_v1 = y_val_v['Label'].map(map_v)
# y_all = Y['Label'].map(map_v)

# #standS = StandardScaler().fit(train_x)
# #standS = MinMaxScaler().fit(train_x)
# X_val_HELM =  np.concatenate([vali_x, test_x], axis=0)
# X_all = standS.fit_transform(X)

# # Hyperparameters of HELM
# para={}
# para['nhelm']         = 9  #5                 # number of times HELM is trained and ran
# ### Using more features then
# #para['neuron_number'] = np.array([36,100]) # HELM structure : 1 AE of size 20, one 1-class classifier of size 100
# para['neuron_number'] = np.array([3,96]) # 3...96  HELM structure : 1 AE of size 20, one 1-class classifier of size 100

# para['fista_weight']  = 1e-3               # weight for AE sparse regularization
# para['fista_cv']      = 1e-5               # Number of iterations (if >1) or max RMSE on iterates (if <1)
# para['ridge_weight']  = 1e-5               # weight for last layer regularization

# # Train HELM
# t0 = time.time()
# model=HELM.HELM(para,train_x)
# #X_reduced_tsne = TSNE(n_components=2, random_state=42).fit_transform(X.values)
# t1 = time.time()
# t_HELM = t1 - t0
# print("HELM {:.3} s".format(t1 - t0))
# t.append(t_HELM)

# # Run HELM on the three datasets
# out=HELM.HELM_run(model,train=train_x,val=vali_x,test= X_val_HELM)

# # Plot results
# # Define a detection threshold based on the quantile of the validation set
# quant = 99.4582;#99.9
# thr   = 0.20369 * np.percentile(out['val']['Y'],quant)

# import matplotlib.pyplot as plt
# f1 = plt.figure(figsize=(11.963,9.3))
# f1.clf()
# plt.plot(range(out['train']['Y'].size), out['train']['Y']/thr, linestyle='None', marker='*', markerfacecolor='none',color='b', label='Normal(Train)') # o
# plt.plot(range(out['train']['Y'].size, out['train']['Y'].size+ out['val']['Y'].size),   out['val']['Y']  /thr, linestyle='None', marker='*', markerfacecolor='none',color='g',label='Normal(Valid)')
# plt.plot(range(out['train']['Y'].size+ out['val']['Y'].size, out['train']['Y'].size+ out['val']['Y'].size + out['val']['Y'].size), out['val']['Y'] /thr, linestyle='None', marker='o', markerfacecolor='none',color='c',label='Normal(Test)')
# plt.plot(range(out['train']['Y'].size+ out['val']['Y'].size + out['val']['Y'].size, out['train']['Y'].size+ out['val']['Y'].size + out['test']['Y'].size), (out['test']['Y'] /thr)[(len(out['val']['Y'] /thr)):], linestyle='None', marker='o', markerfacecolor='none',color='r',label='Abnormal(Test)')


# plt.plot([0, out['train']['Y'].size + out['val']['Y'].size + out['test']['Y'].size],[1,1],color='k')
# plt.ylim((0,2))

# plt.xticks(fontsize= 16)
# plt.yticks(fontsize= 16)

# plt.legend(loc='best',fontsize=16);
# plt.tight_layout()
# plt.xlabel('Data Samples',fontsize=16)
# plt.ylabel('|1-Y_test| / Thrd',fontsize=16)
# #plt.savefig('HELM-Exp-OS1.png',dpi=600)
# y_pred_val = 2 * (out['test']['Y']/thr <=1) - 1

# cm_v = confusion_matrix(y_val_v1, y_pred_val)
# print(classification_report(y_val_v1, y_pred_val))
# print('f1_score:')
# print(f1_score(y_val_v1, y_pred_val))
# print('~~~~~~~~~')
# f1score.append(f1_score(y_val_v1, y_pred_val))
# cm_record.append(cm_v)
# classification_report_record.append(classification_report(y_val_v1, y_pred_val))

# FPR = cm_v[1][0] / (cm_v[1][0] + cm_v[1][1])
# FPR_record.append(FPR)
# #TPR = TP /（TP + FN） 
# TPR = cm_v[0][0] / (cm_v[0][0] + cm_v[0][1])
# ACC = (cm_v[0][0] + cm_v[1][1]) / (cm_v[0][0] + cm_v[0][1] + cm_v[1][0] + cm_v[1][1])

# FPR_record.append(FPR)
# TPR_record.append(TPR)
# ACC_record.append(ACC)

# print('FPR:')
# print(FPR)
# print('~~~~~~~~~')

# print('TPR:')
# print(TPR)
# print('~~~~~~~~~')

# print('ACC:')
# print(ACC)
# print('~~~~~~~~~')

# fig = plt.figure(figsize=(11.963,9.3))
# fig.add_subplot(222)
# plot_confusion_matrix(cm_v, labels, title="Confusion Matrix---HELM", cmap=plt.cm.YlGn)
# #plt.tight_layout()
# #plt.savefig('HELM-Exp-OS2.png',dpi=600)

# f2 = plt.figure(figsize=(11.963,9.3)) #fig = plt.figure(figsize=(11.963,9.3))

# f2.clf()
# plt.plot(range(out['train']['Y'].size),      out['train']['Y']/thr, linestyle='None', marker='o', markerfacecolor='none',color='b', label='Normal(Train)')
# plt.plot(range(out['train']['Y'].size,out['train']['Y'].size+ out['val']['Y'].size),   out['val']['Y']  /thr, linestyle='None', marker='o', markerfacecolor='none',color='g',label='Normal(Valid)')

# weak_leakage = (y_val_v == 1)
# severe_leakage = (y_val_v == 2)
# weak_plot = out['test']['Y'][weak_leakage]
# severe_plot = out['test']['Y'][severe_leakage]

# plt.plot(range(out['train']['Y'].size+ out['val']['Y'].size, out['train']['Y'].size+ out['val']['Y'].size + weak_plot.size), weak_plot /thr, linestyle='None', marker='o', markerfacecolor='none',color='hotpink', label='Weak Abnormal')
# plt.plot(range(out['train']['Y'].size+ out['val']['Y'].size + weak_plot.size, out['train']['Y'].size+ out['val']['Y'].size + weak_plot.size + severe_plot.size), severe_plot /thr, linestyle='None', marker='o', markerfacecolor='none',color='r',label='Severe Abnormal')

# plt.plot([0, out['train']['Y'].size + out['val']['Y'].size + out['test']['Y'].size],[1,1],color='k')
# plt.ylim((0,2))

# plt.xticks(fontsize= 16)
# plt.yticks(fontsize= 16)

# plt.legend(loc='best',fontsize=16);
# plt.tight_layout()
# plt.xlabel('Data Samples',fontsize=16)
# plt.ylabel('|1-Y_test| / τ',fontsize=16)
# #plt.savefig('HELM-Exp-OS3.png',dpi=600)