import pandas as pd
import geopandas as gpd
import re
import numpy as np
import matplotlib.pyplot as plt
# import feather
import plotly.express as px
import libpysal
from libpysal.weights import Queen, Rook, KNN, DistanceBand
import esda
# import contextily as ctx
import shapely
import shapely.geometry as geom
import mapclassify as mc
from matplotlib import colors
pd.options.mode.chained_assignment = None  # default='warn'
get_ipython().run_line_magic('matplotlib', 'inline')

# ----------------------------------- DATA PREP ----------------------------------------

def dfAssignKeyword(afAll, matTypes): 
    
    def assignMatType(x): 
        # x = matDesc 
        # assign multiple keywords to each description
        matType = []
        for j in x.lower().split(): 
            j = j.strip('.,;')
            for i,v in enumerate(matTypes.values()): 
                if j in v: 
                    matType.append(list(matTypes.keys())[i])
        matType = tuple(set(matType))
        if matType == tuple(): 
            matType = tuple(['uncategorized'])


        return matType
    
    # assign keyword according to matDesc 
    afAll['keyword'] = afAll.matDesc.map(lambda x: assignMatType(x))

    # adjustments to keywords
    for i in ['hazardous / sludge', 'fertilizer', 'machinery / electronics']: 
        afAll.keyword = afAll.keyword.map(lambda x: tuple([i]) if i in x and len(x) > 1 else x) # prioritize hazardous, fertilizer, and machinery
    mineralsList = ['concrete / stone', 'earth / sand / clay', 'plaster', 'asphalt', 'mixed construction materials']
    afAll.keyword = afAll.keyword.map(lambda x: tuple(['mixed construction materials']) if len(x) > 1 and np.prod([i in mineralsList for i in x]) == 1 else x)
    for i in ['mixed construction wastes', 'waste from waste treatment']: 
        afAll.keyword = afAll.apply(lambda row: tuple(['mixed construction materials']) if row.matDesc.lower() == i else row.keyword, axis=1)

    # fix df 
    afAll['nKeywords'] = afAll.keyword.map(lambda x: len(x))
    afAll.keyword = afAll.keyword.map(lambda x: x[0])
    afAll = afAll[['keyword', 'kg', 'geometry']]
    afAll['logKg'] = afAll.kg.map(lambda x: np.log(x) if x > 0 else 0)
    afAll.rename(columns={'keyword': 'mat'}, inplace=True)
    afAll.mat = afAll.mat.map(lambda x: x.replace(' / ', '-').replace(' ', '-'))
    afAll = afAll[~afAll.mat.isin(['hazardous-sludge', 'fuel', 'chemical', 'uncategorized'])]
    
    return afAll 

def makeNlGrid(cell_size, pc2): 
    '''
    cell_size: [float] representing size of cells in meters 
    pc2: [geodataframe] of 2-digit postcodes 
    '''
    # create square grid over NL according to cell size
    xmin, ymin, xmax, ymax= np.array([13565.597251204861, 306846.0765220298, 278026.05770926, 615045.3397637472])
    crs = "EPSG:28992"
    grid_cells = []
    for x0 in np.arange(xmin, xmax+cell_size, cell_size):
        for y0 in np.arange(ymin, ymax+cell_size, cell_size):
            # bounds
            x1 = x0-cell_size
            y1 = y0+cell_size
            grid_cells.append(shapely.geometry.box(x0, y0, x1, y1))
    cell = gpd.GeoDataFrame(grid_cells, columns=['geometry'], crs=crs)

    # cut square grid into NL shape 
    mergeIndex = gpd.sjoin(pc2, cell, how='left', op='intersects').index_right.unique()
    nlGrid = cell.loc[mergeIndex]
    nlGrid = nlGrid.reset_index(drop=True).reset_index()
    
    # make spatial weights matrix and save 
    wq = Queen(nlGrid.geometry)
    wq.transform = 'r'
    
    return nlGrid, wq

def ptToGrid(df, nlGrid): 
    '''
    input
    df (geodataframe) - geodataframe with columns kg and geometry (point data)
    nlGrid (geodataframe) - grid of NL with columns index and geometry
    '''
    
    temp = gpd.sjoin(df, nlGrid, how='right')
    temp = temp.dissolve(by="index", aggfunc="sum")[['geometry', 'kg']]

    return temp



# ------------------------------- GLOBAL MORAN's I -------------------------------------

def makeMoransStatsDf(matNlGrids, nlGridsWeights): 
    '''
    inputs: 
    matNlGrids (dictionary) - dictionary of geodataframes with structure: 
        {'materialName': {cellsize : gdf}}
    nlGridsWeights (dictionary) - dictionary of spatial weights for grids with structure: 
        {cellsize: spatialWeightMatrix}
    important note: cell sizes should match in matNlGrids and nlGridsWeight
    
    returns: 
    miStatsDf (dataframe) - df with with columns cell_size, p-value, mi_obs, mi_sim, and mat
    '''

    miDict = {}

    for mat in matNlGrids.keys(): 
        miDict[mat] = {}
        for k, matGrid in matNlGrids[mat].items(): 
            miDict[mat][k] = {}
            # calculate moran's I 
            y = matGrid.logKg
            wq = nlGridsWeights[k]

            np.random.seed(12345)
            mi = esda.moran.Moran(y, wq)

            miDict[mat][k]['p-value'] = mi.p_sim
            miDict[mat][k]['mi_obs'] = mi.I
            miDict[mat][k]['mi_sim'] = mi.EI_sim
            miDict[mat][k]['kgTot'] = matGrid.kg.sum() 
            miDict[mat][k]['percZero'] = round(len(matGrid[matGrid.kg == 0]) / len(matGrid) * 100,1)

    miStatsDf = pd.DataFrame({})

    for mat, miStats in miDict.items(): 
        temp = pd.DataFrame(miDict[mat]).T
        temp['mat'] = mat
        miStatsDf = miStatsDf.append(temp)

    miStatsDf = miStatsDf.reset_index()
    miStatsDf.rename(columns={'index': 'cell_size'}, inplace=True)

    return miStatsDf 


# ------------------------------- SPATIAL CORRELOGRAM ---------------------------------------

def spatialCorrelogramDf(): 
    '''
    makes dataframe with columns material, cell size, moran's I, and p-value; which will allow us to plot spatial correlogram 
    '''
    return 



# ------------------------------------- HCDD ---------------------------------------------

def calcLocalMorans(gdf, wq):
    '''
    gdf (geodataframe) - with columns logKg, geometry
    wq (weight matrix) - queen weight matrix for specific cell size
    note: cell sizes for gdf and wq should match. 
    '''
    temp = gdf
    
    # local Moran's I
    y = temp.logKg
    li = esda.moran.Moran_Local(y, wq)
    n_sig = (li.p_sim < 0.05).sum() # number of significant cells 

    # values of hotspots, coldspots...etc.
    sig = 1 * (li.p_sim < 0.05)
    hotspot = 1 * (sig * li.q==1) # cells in quarter 1 (hotspot) that are also significant 
    coldspot = 3 * (sig * li.q==3) # cells in quarter 3 (coldspot) that are also significant 
    doughnut = 2 * (sig * li.q==2)
    diamond = 4 * (sig * li.q==4)
    spots = hotspot + coldspot + doughnut + diamond
    spot_labels = [ '0 ns', '1 hot spot', '2 doughnut', '3 cold spot', '4 diamond']
    labels = [spot_labels[i] for i in spots]
    temp = temp.assign(cl=labels) # assign labels (hcdd) to gdf 
    
    # add colors 
    colorDict = {
        '0 ns': 'lightgrey', 
        '1 hot spot': 'red', 
        '2 doughnut': 'lightblue', 
        '3 cold spot': 'blue', 
        '4 diamond': 'pink'
    }
    def assignColor(x, colorDict):
        color = colorDict[x]
        return color
    temp['colors'] = temp.cl.map(lambda x: assignColor(x, colorDict))
    
    return temp 


# ------------------------------------- Spatial correlogram in hotspot areas ---------------------------------------------
def makeHotspotGrid(cell_size, hsArea): 
    '''
    cell_size: [float] representing size of cells in meters 
    hsAreaMulti: [GeoDataFrame] of hotspot areas (with area > 900 km2)
    '''
    
    # create square grid over hotspot area according to cell size
    from shapely.ops import cascaded_union
    polygons = hsArea.geometry
    hsAreaMulti = gpd.GeoSeries(cascaded_union(polygons))
    xmin, ymin, xmax, ymax = hsAreaMulti[0].bounds

    crs = "EPSG:28992"
    grid_cells = []

    for x0 in np.arange(xmin, xmax+cell_size, cell_size):
        for y0 in np.arange(ymin, ymax+cell_size, cell_size):
            # bounds
            x1 = x0-cell_size
            y1 = y0+cell_size
            grid_cells.append(shapely.geometry.box(x0, y0, x1, y1))
    cell = gpd.GeoDataFrame(grid_cells, columns=['geometry'], crs=crs)

    # cut square grid into NL shape 
    # gdf = gpd.GeoDataFrame(hsBoundBuffer, columns=['geometry'])
    hsArea = hsArea.set_crs('EPSG:28992')
    mergeIndex = gpd.sjoin(hsArea, cell, how='left', op='intersects').index_right.unique()
    nlGrid = cell.loc[mergeIndex]
    nlGrid = nlGrid.reset_index(drop=True).reset_index()

    # make spatial weights matrix and save 
    wq = Queen(nlGrid.geometry)
    wq.transform = 'r'

    return nlGrid, wq


def makeMoransStatsDf_hotspotAreas(matHotspotGrids, hotspotGridsWeights): 
    '''
    inputs: 
    matHotspotGrids (dictionary) - dictionary of geodataframes with structure: 
        {'materialName': {cellsize : gdf}}
    hotspotGridsWeights (dictionary) - dictionary of spatial weights for grids with structure: 
        {'materialName': {cellsize : spatialWeightsMatrix}}
    important note: cell sizes should match in matNlGrids and nlGridsWeight
    
    returns: 
    miStatsDf (dataframe) - df with with columns cell_size, p-value, mi_obs, mi_sim, and mat
    '''

    miDict = {}

    for mat in matHotspotGrids.keys(): 
        miDict[mat] = {}
        for k, matGrid in matHotspotGrids[mat].items(): 
            miDict[mat][k] = {}
            # calculate moran's I 
            y = matGrid.logKg
            wq = hotspotGridsWeights[mat][k]

            np.random.seed(12345)
            mi = esda.moran.Moran(y, wq)

            miDict[mat][k]['p-value'] = mi.p_sim
            miDict[mat][k]['mi_obs'] = mi.I
            miDict[mat][k]['mi_sim'] = mi.EI_sim
            miDict[mat][k]['kgTot'] = matGrid.kg.sum() 
            miDict[mat][k]['percZero'] = round(len(matGrid[matGrid.kg == 0]) / len(matGrid) * 100,1)

    miStatsDf = pd.DataFrame({})

    for mat, miStats in miDict.items(): 
        temp = pd.DataFrame(miDict[mat]).T
        temp['mat'] = mat
        miStatsDf = miStatsDf.append(temp)

    miStatsDf = miStatsDf.reset_index()
    miStatsDf.rename(columns={'index': 'cell_size'}, inplace=True)

    return miStatsDf 











