"""
Created on Wed Mar 22 08:31:45 2023

@author: necjgerzinic
"""

import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, log, PanelLikelihoodTrajectory
import biogeme.models as models
import biogeme.messaging as msg

df = pd.read_csv("formatted_dataset.csv",sep=',', engine='python')

database = db.Database('LongDistance',df)
globals().update(database.variables)
database.panel('ID')

# Set seed
np.random.seed(0)
# Startsets for parameters
n_class=4
taste_param=20
class_param=40
sets = 10

# Taste parameters to be fixed (not estimated)
TFP = ()
# Class allocation parameters to be fixed (not estimated)
CFP = ([3,4,5,11,12,13,14,22,23,24,25,26,27,28,29,32,34],
       [3,4,5,11,12,13,14,22,23,24,25,26,27,28,29,32,34],
       [3,4,5,11,12,13,14,22,23,24,25,26,27,28,29,32,34])

taste_set = np.random.uniform(-1,0,size=(sets,n_class,taste_param))
estimate_param = np.zeros((n_class,taste_param))

for i in range(len(TFP)):
    taste_set[:,i,TFP[i]]=0
    estimate_param[i,TFP[i]]=1

class_set = np.random.uniform(-1,0,size=(sets,n_class,class_param))
class_set[:,:,1:]=0
est_class_param = np.zeros((n_class-1,class_param))
for i in range(len(CFP)):
    est_class_param[i,CFP[i]]=1


# Initialize two dictionaries and final LL list, to store outcomes of all the models
param_set = {}
stat_set  = {}
log_list  = []    

for s in range(3,sets):

    # ASCs for the shorter scenario
    ASC_TRAIN = [Beta(f'C{c}_ASC_TRAIN',                taste_set[s,c,0], None,None,estimate_param[c,0])  for c in range(n_class)]
    ASC_AIR   = [Beta(f'C{c}_ASC_AIR',                  taste_set[s,c,1], None,None,estimate_param[c,1])  for c in range(n_class)]
    # Generic parameters parameters
    B_COST          = [Beta(f'C{c}_B_COST',             taste_set[s,c,5], None,None,estimate_param[c,5])  for c in range(n_class)]
    B_TIME          = [Beta(f'C{c}_B_TIME',             taste_set[s,c,6], None,None,estimate_param[c,6])  for c in range(n_class)]
    B_COMF          = [Beta(f'C{c}_B_COMF',             taste_set[s,c,7], 0,None,   estimate_param[c,7])  for c in range(n_class)]
    B_COMF_INT      = [Beta(f'C{c}_B_COMF_INT',         taste_set[s,c,8], 0,None,   estimate_param[c,8])  for c in range(n_class)]
    # Mode-specific parameters
    B_RISK_TRAIN      = [Beta(f'C{c}_B_RISK_TRAIN',     taste_set[s,c,10],None,0,   estimate_param[c,10]) for c in range(n_class)]
    B_RISK_TRAIN_INT  = [Beta(f'C{c}_B_RISK_TRAIN_INT', taste_set[s,c,11],None,0,   estimate_param[c,11]) for c in range(n_class)]
    B_RISK_AIR        = [Beta(f'C{c}_B_RISK_AIR',       taste_set[s,c,12],None,0,   estimate_param[c,12]) for c in range(n_class)] 
    B_RISK_AIR_INT    = [Beta(f'C{c}_B_RISK_AIR_INT',   taste_set[s,c,13],None,0,   estimate_param[c,13]) for c in range(n_class)]
    
    # Specify IVT       
    B_TIME_CAR =   [B_TIME[c]                                                              for c in range(n_class)]
    B_TIME_TRAIN = [B_TIME[c] + B_COMF_INT[c] * train_cf + B_RISK_TRAIN_INT[c] * train_pr  for c in range(n_class)]
    B_TIME_AIR   = [B_TIME[c] + B_COMF_INT[c] * plane_cf + B_RISK_AIR_INT[c]   * plane_pr  for c in range(n_class)]
 
    V1 = [               B_COST[c] * tc_car/100   + B_TIME_CAR[c]   * tt_car                                                        for c in range(n_class)]       
    V2 = [ASC_TRAIN[c] + B_COST[c] * tc_train/100 + B_TIME_TRAIN[c] * tt_train + B_COMF[c] * train_cf + B_RISK_TRAIN[c] * train_pr  for c in range(n_class)]
    V3 = [ASC_AIR[c]   + B_COST[c] * tc_plane/100 + B_TIME_AIR[c]   * tt_plane + B_COMF[c] * plane_cf + B_RISK_AIR[c]   * plane_pr  for c in range(n_class)]
   
    V  = [{1: V1[c], 2: V2[c], 3: V3[c]}    for c in range(n_class)]
    av = {1: 1,  2: 1,  3: 1}
    
    # The choice model is a discrete mixture of logit, with availability conditions
    # We calculate the conditional probability for each class
    prob = [PanelLikelihoodTrajectory(models.logit(V[c], av, Choice)) for c in range(n_class)] 

    # Constant
    D_CLASS =           [Beta(f'D{c}_CLASS',          class_set[s,c,0], None,None,est_class_param[c,0])  for c in range(n_class-1)] 
    
    # Socio-demographics
    D_CLASS_AGE =       [Beta(f'D{c}_CLASS_AGE',      class_set[s,c,1], None,None,est_class_param[c,1])  for c in range(n_class-1)] 
    D_CLASS_GENDER =    [Beta(f'D{c}_CLASS_GENDER',   class_set[s,c,2], None,None,est_class_param[c,2])  for c in range(n_class-1)]  
    D_CLASS_INC =       [Beta(f'D{c}_CLASS_INC',      class_set[s,c,3], None,None,est_class_param[c,3])  for c in range(n_class-1)] 
    D_CLASS_INC_no =    [Beta(f'D{c}_CLASS_INC_no',   class_set[s,c,4], None,None,est_class_param[c,4])  for c in range(n_class-1)]
    D_CLASS_EDU =       [Beta(f'D{c}_CLASS_EDU',      class_set[s,c,5], None,None,est_class_param[c,5])  for c in range(n_class-1)] 
    D_CLASS_CAR =       [Beta(f'D{c}_CLASS_CAR',      class_set[s,c,6], None,None,est_class_param[c,6])  for c in range(n_class-1)]  
    
    # NS attributes
    D_CLASS_FIRST =     [Beta(f'D{c}_CLASS_FIRST',    class_set[s,c,11],None,None,est_class_param[c,11]) for c in range(n_class-1)]
    D_CLASS_DAL_VOOR =  [Beta(f'D{c}_CLASS_DAL_VOOR', class_set[s,c,12],None,None,est_class_param[c,12]) for c in range(n_class-1)] 
    D_CLASS_DISCOUNT =  [Beta(f'D{c}_CLASS_DISCOUNT', class_set[s,c,13],None,None,est_class_param[c,13]) for c in range(n_class-1)]
    D_CLASS_STATION =   [Beta(f'D{c}_CLASS_STATION',  class_set[s,c,14],None,None,est_class_param[c,14]) for c in range(n_class-1)] 
    
    # Travel related attributes
    D_CLASS_FREQ    =   [Beta(f'D{c}_CLASS_FREQ',     class_set[s,c,21],None,None,est_class_param[c,21]) for c in range(n_class-1)]   
    D_CLASS_WORK =      [Beta(f'D{c}_CLASS_WORK',     class_set[s,c,22],None,None,est_class_param[c,22]) for c in range(n_class-1)]   
    D_CLASS_VFF =       [Beta(f'D{c}_CLASS_VFF',      class_set[s,c,23],None,None,est_class_param[c,23]) for c in range(n_class-1)] 
    D_CLASS_PURP_OTH =  [Beta(f'D{c}_CLASS_PURP_OTH', class_set[s,c,24],None,None,est_class_param[c,24]) for c in range(n_class-1)]  
    D_CLASS_OTHER_PAY = [Beta(f'D{c}_CLASS_OTHER_PAY',class_set[s,c,25],None,None,est_class_param[c,25]) for c in range(n_class-1)] 
    D_CLASS_PARTNER =   [Beta(f'D{c}_CLASS_PARTNER',  class_set[s,c,26],None,None,est_class_param[c,26]) for c in range(n_class-1)] 
    D_CLASS_FAMILY =    [Beta(f'D{c}_CLASS_FAMILY',   class_set[s,c,27],None,None,est_class_param[c,27]) for c in range(n_class-1)] 
    D_CLASS_W_OTHERS =  [Beta(f'D{c}_CLASS_W_OTHERS', class_set[s,c,28],None,None,est_class_param[c,28]) for c in range(n_class-1)] 
    D_CLASS_OMICRON =   [Beta(f'D{c}_CLASS_OMICRON',  class_set[s,c,29],None,None,est_class_param[c,29]) for c in range(n_class-1)] 
    # Mode preferences
    D_CLASS_S_CAR =     [Beta(f'D{c}_CLASS_S_CAR',    class_set[s,c,31],None,None,est_class_param[c,31]) for c in range(n_class-1)] 
    D_CLASS_S_TRAIN =   [Beta(f'D{c}_CLASS_S_TRAIN',  class_set[s,c,32],None,None,est_class_param[c,32]) for c in range(n_class-1)] 
    D_CLASS_S_AIR =     [Beta(f'D{c}_CLASS_S_AIR',    class_set[s,c,33],None,None,est_class_param[c,33]) for c in range(n_class-1)] 
    D_CLASS_L_CAR =     [Beta(f'D{c}_CLASS_L_CAR',    class_set[s,c,34],None,None,est_class_param[c,34]) for c in range(n_class-1)] 
    D_CLASS_L_TRAIN =   [Beta(f'D{c}_CLASS_L_TRAIN',  class_set[s,c,35],None,None,est_class_param[c,35]) for c in range(n_class-1)] 
    D_CLASS_L_AIR =     [Beta(f'D{c}_CLASS_L_AIR',    class_set[s,c,36],None,None,est_class_param[c,36]) for c in range(n_class-1)] 

    W_last = 0        

    W = [(D_CLASS[c] +
           # Socio-demographics
           D_CLASS_AGE[c]       *  Age + 
           D_CLASS_GENDER[c]    *  Gender +
           D_CLASS_INC[c]       *  HH_income * (HH_income < 8) + 
           D_CLASS_INC_no[c]    * (HH_income == 8) +
           D_CLASS_EDU[c]       *  Education + 
           D_CLASS_CAR[c]       * (Car_AV == 0) +
           # NS attributes
           D_CLASS_FIRST[c]     *  Travel_class +
           D_CLASS_DAL_VOOR[c]  * (Travel_discount == 1) +
           D_CLASS_DISCOUNT[c]  * (Travel_discount > 1) +                      
           D_CLASS_STATION[c]   *  Station_type +
           # Travel attributes
           D_CLASS_FREQ[c]      *  Frequency +
           D_CLASS_WORK[c]      * (Purpose > 1) * (Purpose < 4) +
           D_CLASS_VFF[c]       * (Purpose == 1) +
           D_CLASS_PURP_OTH[c]  * (Purpose > 3) +
           D_CLASS_OTHER_PAY[c] * (Payment > 0) +
           D_CLASS_PARTNER[c]   * (Travel_party == 1) +
           D_CLASS_FAMILY[c]    * (Travel_party > 1) * (Travel_party < 4) +
           D_CLASS_W_OTHERS[c]  * (Travel_party > 3) +
           D_CLASS_OMICRON[c]   *  Omicron +
           # Mode preferences
           D_CLASS_S_CAR[c]    * (Prefer_short == 0) +
           D_CLASS_S_TRAIN[c]  * (Prefer_short == 2) +
           D_CLASS_S_AIR[c]    * (Prefer_short == 3) +
           D_CLASS_L_CAR[c]    * (Prefer_long == 0) +
           D_CLASS_L_TRAIN[c]  * (Prefer_long == 2) +
           D_CLASS_L_AIR[c]    * (Prefer_long == 3)
          ) for c in range(n_class-1)]

    probClass0 = models.logit({1:W[0],2:W[1],3:W[2],4:W_last},None,1)
    probClass1 = models.logit({1:W[0],2:W[1],3:W[2],4:W_last},None,2)
    probClass2 = models.logit({1:W[0],2:W[1],3:W[2],4:W_last},None,3)  
    probClass3 = models.logit({1:W[0],2:W[1],3:W[2],4:W_last},None,4)  

    probIndiv = prob[0] * probClass0 + prob[1] * probClass1 + prob[2] * probClass2 + prob[3] * probClass3
        
    # We integrate over the random variables using Monte-Carlo
    logprob = log(probIndiv)
    
    # Define level of verbosity
    logger = msg.bioMessage()
    logger.setGeneral()
    
    biogeme  = bio.BIOGEME(database,logprob,numberOfThreads=10)
    biogeme.modelName = 'final_model_set_' + str(s)
    biogeme.generatePickle = False
    biogeme.saveIterations = False
    biogeme.generateHtml = True
    results = biogeme.estimate()
    stats = pd.DataFrame(data=results.getGeneralStatistics()).transpose()
    parameters = results.getEstimatedParameters()
    param_set[s] = results.getEstimatedParameters()
    stat_set[s]  = pd.DataFrame(data=results.getGeneralStatistics()).transpose()
    log_list.append(pd.DataFrame(data=results.getGeneralStatistics()).loc[0,'Final log likelihood'])


# Find the iteration set with the highest final LL 
best_set =   log_list.index(max(log_list))
parameters = param_set[best_set]
stats =      stat_set[best_set]
 

    
    
    
    