
import pandas as pd
import sys

"""
Simulating/estimating discrete choice experiments within sfctools using swissmetro data
NOTE before starting this script, download the swissmetro data from
https://transp-or.epfl.ch/pythonbiogeme/examples/swissmetro/swissmetro.dat
"""


if __name__ == "__main__":

    from sfctools.ml_models.nested_logit import MultinomialNestedLogitModel as MNL

    # read swissmetro data
    df = pd.read_csv('swissmetro.dat', sep='\t')

    # filter data
    df = df[df["CHOICE"] > 0]  # drop unknown choices
    df = df.dropna()  # drop nans

    # rename choice column
    df = df.rename(columns={"CHOICE": "choice"})

    # rename model choices to human-readable
    df.loc[df["choice"] == 1, "choice"] = "TRAIN"
    df.loc[df["choice"] == 2, "choice"] = "SM"
    df.loc[df["choice"] == 3, "choice"] = "CAR"

    # transform features
    df["free_ticket"] = (((df["GA"] == 1) | (df["WHO"] == 2)) &
                         df["choice"].isin(["TRAIN", "SM"])).astype(int)

    df["single_luggage"] = (df["LUGGAGE"] == 1).astype(int)
    df["multi_luggage"] = (df["LUGGAGE"] == 3).astype(int)
    df["regular_class"] = 1 - df["FIRST"]
    df["train_survey"] = 1 - df["SURVEY"]

    # re-scale some of the features

    # convert travel cost
    df["TRAIN_CO"] = df["TRAIN_CO"] * (df["free_ticket"] == 0) / 100.0
    df["CAR_CO"] /= 100
    df["SM_CO"] /= 100

    # convert travel time from minutes to hours
    df["TRAIN_TT"] /= 60
    df["CAR_TT"] /= 60
    df["SM_TT"] /= 60

    print(df.head().T)

    """
    Define nesting and utility components

    Alternative-specific features are :
        travel time (_TT)
        travel costs (_CO)
        headway (_HE)
        seat configuration in Swiss Metro (SM_SEATS)

    Individual-specific features are:
        free_ticket: person has no marginal ticket costs
        single_luggage: person has a single luggage piece
        multi_luggage: person has multiple luggage pieces
        regular_class: not first class traveller
        train_survey: survey was taken aboard a train

    """

    nesting = {
        "TRAIN": ["TRAIN_TT", "TRAIN_CO", "TRAIN_HE"],
        "SM": ["SM_TT", "SM_CO", "SM_HE", "SM_SEATS"],
        "CAR": ["CAR_TT", "CAR_CO"]
    }

    model = MNL(nesting)
    print(model.param_vector)

    # model.visualize_tree()  # < visualizes the model tree as networkx graph

    # set parameter bounds
    # u = 10.0
    # bounds = {
    #     'beta_public_1_0': (-u, u),
    #     'beta_public_1_1': (-u, u),

    #     'beta_public_2_0': (-u, u),
    #     'beta_public_2_1': (-u, u),

    #     'beta_private_3_0': (-u, u),
    #     'beta_private_3_1': (-u, u),

    #     'asc_public_2': (-100, 100),
    #     'asc_private_3': (-100, 100),
    # }

    u = 200.0
    bounds = {
        'beta_TRAIN_TRAIN_CO': (0, 0),
        'beta_SM_SM_TT': (-u, u),
        'beta_CAR_CAR_CO': (-u, u),
        'asc_2': (-100, 100),
    }

    # fit the model
    # print("attr names", model.attribute_names)
    # print("----")

    result, stats = model.fit(
        df, sampling_method='lhs', num_samples_per_param=1000,
        bounds=bounds, optimize_globally=True,
        regularization="l2", alpha_2=.1)
