# -*- coding: utf-8 -*-
"""
Created on Wed Mar 22 13:37:10 2023

@author: necjgerzinic
"""

import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, log, PanelLikelihoodTrajectory
import biogeme.models as models

df = pd.read_csv("formatted_dataset.csv",sep=',', engine='python')

database = db.Database('UrbanFlex',df)
globals().update(database.variables)
database.panel('Respondent')
  
# Define which variables need to be estimated and which are fixed     
estimate = ([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     
             1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     1, 0, 0, 0])
# Define the starting values (this only applied for the fixed parameters)
values =   ([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0])
# Number of starting sets
R = 10
# Number of parameters per class
p = 12
# Set seed
np.random.seed(0)
# startset for parameters
startset = np.random.uniform(-1,1,(R,len(estimate)))
# Replace the random values with fixed values of parameters you do not wish to estimate
non = [index for index, value in enumerate(estimate) if value == 1]
for i in non:
    startset[:,i] = values[i]
   
# Initialize two dictionaries and final LL list, to store outcomes of all the models
beta_set = {}
stat_set = {}
log_list = []

for r in range(R):

    ASC_BIKE_1 = Beta('ASC_BIKE_1',startset[r,0],None,None,estimate[0])
    ASC_CAR_1 = Beta('ASC_CAR_1',startset[r,1],None,None,estimate[1])
    ASC_PT_1 = Beta('ASC_PT_1',startset[r,2],None,None,estimate[2])
    ASC_MOD_1 = Beta('ASC_MOD_1',startset[r,3],None,None,estimate[3])
    B_TIME_B_1 = Beta('B_TIME_B_1',startset[r,4],None,None,estimate[4])
    B_TIME_M_1 = Beta('B_TIME_M_1',startset[r,5],None,None,estimate[5])
    B_COST_1 = Beta('B_COST_1',startset[r,6],None,None,estimate[6])
    B_ACCESS_1 = Beta('B_ACCESS_1',startset[r,7],None,None,estimate[7])
    B_WAIT_PT_1 = Beta('B_WAIT_PT_1',startset[r,8],None,None,estimate[8])
    B_WAIT_MOD_1 = Beta('B_WAIT_MOD_1',startset[r,9],None,None,estimate[9])
    B_SHARE_1 = Beta('B_SHARE_1',startset[r,10],None,None,estimate[10])
    
    B_LEISURE_1 = Beta('B_LEISURE_1',startset[r,11],None,None,estimate[11])
    
    LC_PT_1 = B_COST_1 + B_LEISURE_1 * LEISURE
    LC_CAR_1 = B_COST_1 + B_LEISURE_1 * LEISURE
    LC_MOD1_1 = B_COST_1 + B_LEISURE_1 * LEISURE
    LC_MOD2_1 = B_COST_1 + B_LEISURE_1 * LEISURE
    
    V11 = ASC_BIKE_1 + B_TIME_B_1 * BIKE_TIME
    V12 = ASC_PT_1   + B_TIME_M_1 * PT_TIME   + LC_PT_1   * PT_COST   + B_ACCESS_1 * PT_ACCESS   + B_WAIT_PT_1  * PT_WAIT
    V13 = ASC_CAR_1  + B_TIME_M_1 * CAR_TIME  + LC_CAR_1  * CAR_COST  + B_ACCESS_1 * CAR_ACCESS
    V14 = ASC_MOD_1  + B_TIME_M_1 * MOD1_TIME + LC_MOD1_1 * MOD1_COST + B_ACCESS_1 * MOD1_ACCESS + B_WAIT_MOD_1 * MOD1_WAIT + B_SHARE_1 * MOD1_SHARE
    V15 = ASC_MOD_1  + B_TIME_M_1 * MOD2_TIME + LC_MOD2_1 * MOD2_COST + B_ACCESS_1 * MOD2_ACCESS + B_WAIT_MOD_1 * MOD2_WAIT + B_SHARE_1 * MOD2_SHARE
    V1 = {1: V11, 2: V12, 3: V13, 4: V14, 5: V15}
    
    
    ASC_BIKE_2 = Beta('ASC_BIKE_2',startset[r,p+0],None,None,estimate[p+0])
    ASC_CAR_2 = Beta('ASC_CAR_2',startset[r,p+1],None,None,estimate[p+1])
    ASC_PT_2 = Beta('ASC_PT_2',startset[r,p+2],None,None,estimate[p+2])
    ASC_MOD_2 = Beta('ASC_MOD_2',startset[r,p+3],None,None,estimate[p+3])
    B_TIME_B_2 = Beta('B_TIME_B_2',startset[r,p+4],None,None,estimate[p+4])
    B_TIME_M_2 = Beta('B_TIME_M_2',startset[r,p+5],None,None,estimate[p+5])
    B_COST_2 = Beta('B_COST_2',startset[r,p+6],None,None,estimate[p+6])
    B_ACCESS_2 = Beta('B_ACCESS_2',startset[r,p+7],None,None,estimate[p+7])
    B_WAIT_PT_2 = Beta('B_WAIT_PT_2',startset[r,p+8],None,None,estimate[p+8])
    B_WAIT_MOD_2 = Beta('B_WAIT_MOD_2',startset[r,p+9],None,None,estimate[p+9])
    B_SHARE_2 = Beta('B_SHARE_2',startset[r,p+10],None,None,estimate[p+10])
    
    B_LEISURE_2 = Beta('B_LEISURE_2',startset[r,p+11],None,None,estimate[p+11])
    
    LC_PT_2 = B_COST_2 + B_LEISURE_2 * LEISURE
    LC_CAR_2 = B_COST_2 + B_LEISURE_2 * LEISURE
    LC_MOD1_2 = B_COST_2 + B_LEISURE_2 * LEISURE
    LC_MOD2_2 = B_COST_2 + B_LEISURE_2 * LEISURE
    
    V21 = ASC_BIKE_2 + B_TIME_B_2 * BIKE_TIME
    V22 = ASC_PT_2   + B_TIME_M_2 * PT_TIME   + LC_PT_2   * PT_COST   + B_ACCESS_2 * PT_ACCESS   + B_WAIT_PT_2  * PT_WAIT
    V23 = ASC_CAR_2  + B_TIME_M_2 * CAR_TIME  + LC_CAR_2  * CAR_COST  + B_ACCESS_2 * CAR_ACCESS
    V24 = ASC_MOD_2  + B_TIME_M_2 * MOD1_TIME + LC_MOD1_2 * MOD1_COST + B_ACCESS_2 * MOD1_ACCESS + B_WAIT_MOD_2 * MOD1_WAIT + B_SHARE_2 * MOD1_SHARE
    V25 = ASC_MOD_2  + B_TIME_M_2 * MOD2_TIME + LC_MOD2_2 * MOD2_COST + B_ACCESS_2 * MOD2_ACCESS + B_WAIT_MOD_2 * MOD2_WAIT + B_SHARE_2 * MOD2_SHARE
    V2 = {1: V21, 2: V22, 3: V23, 4: V24, 5: V25}
    
    
    ASC_BIKE_3 = Beta('ASC_BIKE_3',startset[r,2*p+0],None,None,estimate[2*p+0])
    ASC_CAR_3 = Beta('ASC_CAR_3',startset[r,2*p+1],None,None,estimate[2*p+1])
    ASC_PT_3 = Beta('ASC_PT_3',startset[r,2*p+2],None,None,estimate[2*p+2])
    ASC_MOD_3 = Beta('ASC_MOD_3',startset[r,2*p+3],None,None,estimate[2*p+3])
    B_TIME_B_3 = Beta('B_TIME_B_3',startset[r,2*p+4],None,None,estimate[2*p+4])
    B_TIME_M_3 = Beta('B_TIME_M_3',startset[r,2*p+5],None,None,estimate[2*p+5])
    B_COST_3 = Beta('B_COST_3',startset[r,2*p+6],None,None,estimate[2*p+6])
    B_ACCESS_3 = Beta('B_ACCESS_3',startset[r,2*p+7],None,None,estimate[2*p+7])
    B_WAIT_PT_3 = Beta('B_WAIT_PT_3',startset[r,2*p+8],None,None,estimate[2*p+8])
    B_WAIT_MOD_3 = Beta('B_WAIT_MOD_3',startset[r,2*p+9],None,None,estimate[2*p+9])
    B_SHARE_3 = Beta('B_SHARE_3',startset[r,2*p+10],None,None,estimate[2*p+10])
    
    B_LEISURE_3 = Beta('B_LEISURE_3',startset[r,2*p+11],None,None,estimate[2*p+11])
    
    LC_PT_3 = B_COST_3 + B_LEISURE_3 * LEISURE
    LC_CAR_3 = B_COST_3 + B_LEISURE_3 * LEISURE
    LC_MOD1_3 = B_COST_3 + B_LEISURE_3 * LEISURE
    LC_MOD2_3 = B_COST_3 + B_LEISURE_3 * LEISURE
    
    V31 = ASC_BIKE_3 + B_TIME_B_3 * BIKE_TIME
    V32 = ASC_PT_3   + B_TIME_M_3 * PT_TIME  + LC_PT_3    * PT_COST   + B_ACCESS_3 * PT_ACCESS   + B_WAIT_PT_3  * PT_WAIT
    V33 = ASC_CAR_3  + B_TIME_M_3 * CAR_TIME + LC_CAR_3   * CAR_COST  + B_ACCESS_3 * CAR_ACCESS
    V34 = ASC_MOD_3  + B_TIME_M_3 * MOD1_TIME + LC_MOD1_3 * MOD1_COST + B_ACCESS_3 * MOD1_ACCESS + B_WAIT_MOD_3 * MOD1_WAIT + B_SHARE_3 * MOD1_SHARE
    V35 = ASC_MOD_3  + B_TIME_M_3 * MOD2_TIME + LC_MOD2_3 * MOD2_COST + B_ACCESS_3 * MOD2_ACCESS + B_WAIT_MOD_3 * MOD2_WAIT + B_SHARE_3 * MOD2_SHARE
    V3 = {1: V31, 2: V32, 3: V33, 4: V34, 5: V35}
    
    
    ASC_BIKE_4 = Beta('ASC_BIKE_4',startset[r,3*p+0],None,None,estimate[3*p+0])
    ASC_CAR_4 = Beta('ASC_CAR_4',startset[r,3*p+1],None,None,estimate[3*p+1])
    ASC_PT_4 = Beta('ASC_PT_4',startset[r,3*p+2],None,None,estimate[3*p+2])
    ASC_MOD_4 = Beta('ASC_MOD_4',startset[r,3*p+3],None,None,estimate[3*p+3])
    B_TIME_B_4 = Beta('B_TIME_B_4',startset[r,3*p+4],None,None,estimate[3*p+4])
    B_TIME_M_4 = Beta('B_TIME_M_4',startset[r,3*p+5],None,None,estimate[3*p+5])
    B_COST_4 = Beta('B_COST_4',startset[r,3*p+6],None,None,estimate[3*p+6])
    B_ACCESS_4 = Beta('B_ACCESS_4',startset[r,3*p+7],None,None,estimate[3*p+7])
    B_WAIT_PT_4 = Beta('B_WAIT_PT_4',startset[r,3*p+8],None,None,estimate[3*p+8])
    B_WAIT_MOD_4 = Beta('B_WAIT_MOD_4',startset[r,3*p+9],None,None,estimate[3*p+9])
    B_SHARE_4 = Beta('B_SHARE_4',startset[r,3*p+10],None,None,estimate[3*p+10])
    
    B_LEISURE_4 = Beta('B_LEISURE_4',startset[r,3*p+11],None,None,estimate[3*p+11])
    
    LC_PT_4 = B_COST_4 + B_LEISURE_4 * LEISURE
    LC_CAR_4 = B_COST_4 + B_LEISURE_4 * LEISURE
    LC_MOD1_4 = B_COST_4 + B_LEISURE_4 * LEISURE
    LC_MOD2_4 = B_COST_4 + B_LEISURE_4 * LEISURE
    
    V41 = ASC_BIKE_4 + B_TIME_B_4 * BIKE_TIME
    V42 = ASC_PT_4   + B_TIME_M_4 * PT_TIME   + LC_PT_4   * PT_COST   + B_ACCESS_4 * PT_ACCESS   + B_WAIT_PT_4  * PT_WAIT
    V43 = ASC_CAR_4  + B_TIME_M_4 * CAR_TIME  + LC_CAR_4  * CAR_COST  + B_ACCESS_4 * CAR_ACCESS
    V44 = ASC_MOD_4  + B_TIME_M_4 * MOD1_TIME + LC_MOD1_4 * MOD1_COST + B_ACCESS_4 * MOD1_ACCESS + B_WAIT_MOD_4 * MOD1_WAIT + B_SHARE_4 * MOD1_SHARE
    V45 = ASC_MOD_4  + B_TIME_M_4 * MOD2_TIME + LC_MOD2_4 * MOD2_COST + B_ACCESS_4 * MOD2_ACCESS + B_WAIT_MOD_4 * MOD2_WAIT + B_SHARE_4 * MOD2_SHARE
    V4 = {1: V41, 2: V42, 3: V43, 4: V44, 5: V45}
    
    av = {1: BIKE_AV, 2: PT_AV, 3: CAR_AV, 4: MOD1_AV, 5: MOD2_AV}
    
    
    # Class membership model
    CLASS_1 = Beta('CLASS_1',startset[r,4*p+0],None,None,estimate[4*p+0])
    CLASS_2 = Beta('CLASS_2',startset[r,4*p+1],None,None,estimate[4*p+1])
    CLASS_3 = Beta('CLASS_3',startset[r,4*p+2],None,None,estimate[4*p+2])
    CLASS_4 = Beta('CLASS_4',startset[r,4*p+3],None,None,estimate[4*p+3])
    W1 = CLASS_1
    W2 = CLASS_2
    W3 = CLASS_3
    W4 = CLASS_4
    probClass1 = models.logit({1:W1,2:W2,3:W3,4:W4},None,1)
    probClass2 = models.logit({1:W1,2:W2,3:W3,4:W4},None,2)
    probClass3 = models.logit({1:W1,2:W2,3:W3,4:W4},None,3)
    probClass4 = models.logit({1:W1,2:W2,3:W3,4:W4},None,4)

    # The choice model is a discrete mixture of logit, with availability conditions
    # Conditional to the random variables, likelihood if the individual is
    prob1 = PanelLikelihoodTrajectory(models.logit(V1,av,Choice))
    prob2 = PanelLikelihoodTrajectory(models.logit(V2,av,Choice))
    prob3 = PanelLikelihoodTrajectory(models.logit(V3,av,Choice))
    prob4 = PanelLikelihoodTrajectory(models.logit(V4,av,Choice))
    # Conditional to the random variables, likelihood for the individual.
    probIndiv = probClass1 * prob1 + probClass2 * prob2 + probClass3 * prob3 + probClass4 * prob4
    
    # We integrate over the random variables using Monte-Carlo
    logprob = log(probIndiv)
            
    biogeme  = bio.BIOGEME(database,logprob)
    biogeme.modelName = 'LC4_context_i' + str(r+1)
    results = biogeme.estimate()
    beta_set[r] = results.getEstimatedParameters()
    stat_set[r] = pd.DataFrame(data=results.getGeneralStatistics()).transpose()
    log_list.append(pd.DataFrame(data=results.getGeneralStatistics()).iloc[0,5])
    
# Find the iteration set with the highest final LL 
set=log_list.index(max(log_list))
betas = beta_set[set]
stats = stat_set[set]











