import pandas as pd

from ...client.kawa_client import KawaClient as K
from datetime import datetime, date, timedelta
import numpy as np
from faker import Faker


def kawa():
    k = K(kawa_api_url='http://localhost:8080')
    k.set_api_key(api_key_file='/Users/emmanuel/doc/local-pristine/.key')
    k.set_active_workspace_id(workspace_id='109')
    return k


research = kawa().research('7-day and 15-day Retention Analysis for Last Two Weeks')

# Gather the model
interactions_model = research.register_model(model_id='3677')

## PHASE 1: Model building

# Create metric to identify the week of joining
interactions_model.create_metric(
    name='join_week_start',
    formula='DATE_SUB("join_date", INTERVAL DAYOFWEEK("join_date") - 1 DAY)'
)

# Create metric to calculate days since join
interactions_model.create_metric(
    name='days_since_join',
    formula='DATEDIFF("Interaction_date", "join_date")'
)

# Create metric to identify if player is 7-day retained
interactions_model.create_metric(
    name='is_7day_retained',
    formula='CASE WHEN "days_since_join" >= 7 THEN 1 ELSE 0 END'
)

# Create metric to identify if player is 15-day retained
interactions_model.create_metric(
    name='is_15day_retained',
    formula='CASE WHEN "days_since_join" >= 15 THEN 1 ELSE 0 END'
)

research.publish_models()

## PHASE 2: Model querying and analysis

# First, get the most recent interaction date to determine "last week" and "week before"
max_date_df = (interactions_model.select(
    K.col('Interaction_date').max().alias('max_date')
)
.no_limit()
.collect())

import datetime
max_date = max_date_df['max_date'].iloc[0]

# Calculate the start of last week and week before
last_week_start = max_date - datetime.timedelta(days=max_date.weekday() + 7)
week_before_start = last_week_start - datetime.timedelta(days=7)
week_before_end = last_week_start - datetime.timedelta(days=1)

# Get all interactions for players who joined in these two weeks
df = (interactions_model.select(
    K.col('player_id').first(),
    K.col('join_date').first(),
    K.col('join_week_start').first(),
    K.col('Interaction_date').first(),
    K.col('days_since_join').first(),
    K.col('is_7day_retained').first(),
    K.col('is_15day_retained').first()
)
.filter(K.col('join_date').date_range(from_inclusive=week_before_start, to_inclusive=max_date))
.no_limit()
.collect())

# Process the data to calculate retention rates
import pandas as pd

# Identify the cohort for each player
df['cohort'] = df['join_date'].apply(lambda x: 'Last Week' if x >= last_week_start else 'Week Before')

# For each player, determine if they are retained
player_retention = df.groupby(['player_id', 'cohort', 'join_date']).agg({
    'is_7day_retained': 'max',
    'is_15day_retained': 'max'
}).reset_index()

# Calculate retention rates by cohort
retention_summary = player_retention.groupby('cohort').agg({
    'player_id': 'count',
    'is_7day_retained': 'sum',
    'is_15day_retained': 'sum'
}).reset_index()

retention_summary.columns = ['Cohort', 'Total Players', '7-Day Retained Players', '15-Day Retained Players']
retention_summary['7-Day Retention Rate'] = (retention_summary['7-Day Retained Players'] / retention_summary['Total Players'] * 100).round(2)
retention_summary['15-Day Retention Rate'] = (retention_summary['15-Day Retained Players'] / retention_summary['Total Players'] * 100).round(2)

# Reorder to show Last Week first
retention_summary = retention_summary.sort_values('Cohort', ascending=False)

response_model = research.publish_results(
    title='Retention Analysis by Cohort',
    df=retention_summary
)

## PHASE 3: Explain your reasoning

report = research.report()

report.header1('7-Day and 15-Day Retention Analysis: Last Week vs Week Before')

report.paragraph("""
The user wants to compare the retention rates of players who joined in the last week versus those who joined in the week before. Specifically, we need to calculate the 7-day and 15-day retention rates for both cohorts.
""")

report.header2('Approach used')

report.paragraph("""
To analyze retention, I enriched the data model with several calculated metrics. First, I created a metric to identify the week when each player joined (join_week_start). Then, I calculated the number of days between each interaction and the player's join date (days_since_join). Using this, I created two boolean metrics: is_7day_retained (1 if the player had an interaction 7 or more days after joining) and is_15day_retained (1 if the player had an interaction 15 or more days after joining). These metrics allow us to identify retained players based on the presence of interactions after the retention threshold.
""")

report.paragraph("""
I queried the model to extract all interactions for players who joined in the last two weeks. The data was then processed in Python to group players by their join cohort (Last Week or Week Before) and determine the maximum retention status for each player. Finally, I calculated the retention rates by dividing the number of retained players by the total number of players in each cohort. The analysis shows that among the {0} players who joined last week, {1}% were retained at 7 days and {2}% at 15 days, while among the {3} players who joined the week before, {4}% were retained at 7 days and {5}% at 15 days.
""".format(
    retention_summary[retention_summary['Cohort'] == 'Last Week']['Total Players'].iloc[0] if 'Last Week' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Last Week']['7-Day Retention Rate'].iloc[0] if 'Last Week' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Last Week']['15-Day Retention Rate'].iloc[0] if 'Last Week' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Week Before']['Total Players'].iloc[0] if 'Week Before' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Week Before']['7-Day Retention Rate'].iloc[0] if 'Week Before' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Week Before']['15-Day Retention Rate'].iloc[0] if 'Week Before' in retention_summary['Cohort'].values else 0
))

report.header2('Data model')

report.header3('OK OK')

report.table(
    title='Player Interactions with Retention Metrics',
    source=interactions_model,
    column_names=['player_id', 'join_date', 'Interaction_date', 'join_week_start', 'days_since_join', 'is_7day_retained', 'is_15day_retained']
)

report.paragraph("""
I added four calculated metrics to the model: join_week_start (to identify the cohort week), days_since_join (to calculate the time elapsed since joining), is_7day_retained (to flag players with interactions 7+ days after joining), and is_15day_retained (to flag players with interactions 15+ days after joining). These metrics enable the calculation of retention rates for different time periods and cohorts.
""")

report.header2('Final result')

report.paragraph("""
After querying the model and grouping by player and cohort, I calculated the retention rates for each cohort. A player is considered N-day retained if they have at least one interaction N or more days after their join date. The retention rate is the percentage of players in each cohort who meet this criterion.
""")

report.table(
    title='Retention Rates by Cohort',
    source=response_model,
    column_names=['Cohort', 'Total Players', '7-Day Retained Players', '15-Day Retained Players', '7-Day Retention Rate', '15-Day Retention Rate']
)

report.paragraph("""
The analysis shows that players who joined last week have a 7-day retention rate of {0}% and a 15-day retention rate of {1}%, while players who joined the week before have a 7-day retention rate of {2}% and a 15-day retention rate of {3}%. This comparison allows us to understand how well each cohort is retained over time.
""".format(
    retention_summary[retention_summary['Cohort'] == 'Last Week']['7-Day Retention Rate'].iloc[0] if 'Last Week' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Last Week']['15-Day Retention Rate'].iloc[0] if 'Last Week' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Week Before']['7-Day Retention Rate'].iloc[0] if 'Week Before' in retention_summary['Cohort'].values else 0,
    retention_summary[retention_summary['Cohort'] == 'Week Before']['15-Day Retention Rate'].iloc[0] if 'Week Before' in retention_summary['Cohort'].values else 0
))

report.publish()