import pandas as pd
import glob
import os
import plotly.graph_objects as go
import json
import re
from datetime import datetime

def analyze_game_schedules():
    # 1. Define aliases for columns to handle header variations across years
    column_aliases = {
        'Date': ['Date', 'date', 'DATE'],
        'Num': ['Num', 'Number', 'game_num', 'Number of Game'],
        'Day': ['Day', 'day', 'DAY'],
        'Visitor': ['Visitor', 'Visiting Team', 'visitor', 'Vis', 'VISITOR'],
        'Home': ['Home', 'Home Team', 'home', 'Hm', 'HOME'],
        'Postponed': ['Postponed', 'postponed', 'Postponement', 'POSTPONED'],
        'Makeup': ['Makeup', 'makeup', 'Makeup Date', 'MAKEUP']
    }
    
    # 2. Find all CSV files in the schedules folder
    all_files = glob.glob("schedules/*.csv")
    files_to_process = [f for f in all_files if not f.endswith("2020sched-orig.csv")]
    
    df_list = []
    modern_teams = []
    
    for file_path in files_to_process:
        try:
            temp_df = pd.read_csv(file_path, low_memory=False)
            
            # Clean column names immediately
            temp_df.columns = [str(c).strip() for c in temp_df.columns]
            
            # Map headers to our unified names
            rename_map = {}
            for official, aliases in column_aliases.items():
                for alias in aliases:
                    if alias in temp_df.columns:
                        rename_map[alias] = official
                        break 
            
            # Skip if we don't have the core required columns
            if not all(col in rename_map for col in ['Visitor', 'Home', 'Date']):
                continue

            clean_df = temp_df[list(rename_map.keys())].rename(columns=rename_map).copy()
            
            # Ensure the Date column has data
            clean_df = clean_df.dropna(subset=['Date'])
            df_list.append(clean_df)
                
            # Extract modern teams (30 franchises) from the 2025 file
            if "2025" in file_path:
                teams_2025 = set(clean_df['Visitor'].dropna().unique()) | set(clean_df['Home'].dropna().unique())
                modern_teams = sorted([str(t).strip() for t in teams_2025 if len(str(t).strip()) == 3])
                
        except Exception as e:
            print(f"Skipping {file_path} due to error: {e}")

    if not df_list:
        print("No data found to analyze.")
        return

    # 3. Combine into one master dataframe
    master_df = pd.concat(df_list, ignore_index=True, sort=False)
    
    # 4. Standardize all data
    for col in ['Postponed', 'Makeup', 'Date', 'Visitor', 'Home']:
        if col in master_df.columns:
            # Fill NA, convert to string, strip whitespace, remove .0 if float-converted
            master_df[col] = master_df[col].fillna('').astype(str).str.strip().str.replace(r'\.0$', '', regex=True)

    # FRANCHISE CONSOLIDATION LOGIC
    # Map historical codes to modern MIA for consolidation
    franchise_map = {'FLO': 'MIA'}
    if 'Visitor' in master_df.columns and 'Home' in master_df.columns:
        master_df['Visitor'] = master_df['Visitor'].replace(franchise_map)
        master_df['Home'] = master_df['Home'].replace(franchise_map)

    # 5. FILTERING: Remove games explicitly cancelled or not made up
    # Per user instructions: games = total rows minus "No makeup played" OR "Not made up" OR "Not rescheduled"
    cancel_mask = pd.Series(False, index=master_df.index)
    if 'Postponed' in master_df.columns:
        cancel_mask |= master_df['Postponed'].str.contains("No makeup played", case=False, na=False)
    if 'Makeup' in master_df.columns:
        cancel_mask |= master_df['Makeup'].str.contains("Not made up", case=False, na=False)
    if 'Makeup' in master_df.columns:
        cancel_mask |= master_df['Makeup'].str.contains("Not rescheduled", case=False, na=False)

    master_df = master_df[~cancel_mask]

    # 6. ROBUST DATE EXTRACTION
    # Instead of to_numeric, we use regex to extract the first 8 consecutive digits.
    def extract_8_digit_date(val):
        match = re.search(r'(\d{8})', val)
        return match.group(1) if match else ""

    master_df['CleanDate'] = master_df['Date'].apply(extract_8_digit_date)
    master_df['CleanMakeup'] = master_df['Makeup'].apply(extract_8_digit_date)

    # Determine final play date: Makeup date takes priority, otherwise use scheduled Date.
    def determine_actual_date(row):
        if row['CleanMakeup'] != "":
            return row['CleanMakeup']
        return row['CleanDate']

    master_df['ActualDateStr'] = master_df.apply(determine_actual_date, axis=1)

    # Final cleanup of the date column
    master_df = master_df[master_df['ActualDateStr'] != ""]
    master_df['Year'] = master_df['ActualDateStr'].str[:4].astype(int)
    master_df['MonthDayStr'] = master_df['ActualDateStr'].str[-4:]
    
    # Exclude Leap Day to keep 365-day visualization consistent
    master_df = master_df[master_df['MonthDayStr'] != "0229"]

    # 7. Map MonthDay to Day-of-Year (1-365)
    def get_day_of_year(mmdd):
        try:
            # Using 2023 as reference non-leap year
            return datetime.strptime(f"2023{mmdd}", "%Y%m%d").timetuple().tm_yday
        except:
            return None

    master_df['DayOfYear'] = master_df['MonthDayStr'].apply(get_day_of_year)
    master_df = master_df.dropna(subset=['DayOfYear'])

    # 8. Prepare Team Mapping for JS
    all_teams_global = sorted(list(set(master_df['Visitor'].unique()) | set(master_df['Home'].unique())))
    all_teams_global = [t for t in all_teams_global if len(t) == 3]
    
    team_to_id = {team: i + 1 for i, team in enumerate(all_teams_global)}
    
    master_df['VisitorID'] = master_df['Visitor'].map(team_to_id).fillna(0).astype(int)
    master_df['HomeID'] = master_df['Home'].map(team_to_id).fillna(0).astype(int)

    # 9. JSON preparation: [Year, DayOfYear, VisitorID, HomeID]
    js_records = master_df[['Year', 'DayOfYear', 'VisitorID', 'HomeID']].values.tolist()

    # Era definitions for the sidebar
    era_definitions = [
        ("All Time", range(1800, 2100)),
        ("Pre-modern Era (1876–1900)", range(1876, 1901)),
        ("Birth of Modern Era (1901–1919)", range(1901, 1920)),
        ("Segregation Era (1920–1946)", range(1920, 1947)),
        ("Integration Era (1947–1960)", range(1947, 1961)),
        ("First Expansion (1961–1968)", range(1961, 1969)),
        ("Birth of Division Play (1969–1993)", range(1969, 1994)),
        ("Wild Card Era (1994–Present)", range(1994, 2100)),
        ("30 Teams, 162 Games (1998–Present)", range(1998, 2100))
    ]
    
    era_labels = [e[0] for e in era_definitions]
    era_ranges = [[min(e[1]), max(e[1])] for e in era_definitions]

    # Month/Day tooltips
    month_days = []
    for i in range(1, 366):
        date_obj = datetime.strptime(f"2023 {i}", "%Y %j")
        month_days.append(date_obj.strftime("%m%d"))

    # JSON payloads - REVERSED years for the UI list
    records_json = json.dumps(js_records)
    years_json = json.dumps([int(y) for y in sorted(master_df['Year'].unique(), reverse=True)])
    eras_json = json.dumps(era_labels)
    era_ranges_json = json.dumps(era_ranges)
    all_teams_json = json.dumps(all_teams_global)
    modern_teams_json = json.dumps(modern_teams if modern_teams else all_teams_global)
    month_days_json = json.dumps(month_days)

    # 10. GENERATE HTML
    final_html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Baseball Schedule Analytics</title>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
        <style>
            body {{ font-family: 'Segoe UI', sans-serif; margin: 0; background: #f0f2f5; display: flex; flex-direction: column; height: 100vh; overflow: hidden; }}
            #header {{ padding: 12px 25px; background: #1a2a3a; color: white; display: flex; justify-content: space-between; align-items: center; z-index: 100; flex-shrink: 0; }}
            #main-container {{ display: flex; flex: 1; overflow: hidden; min-height: 0; }}
            #sidebar {{ width: 300px; background: white; border-right: 1px solid #ddd; display: flex; flex-direction: column; overflow: hidden; flex-shrink: 0; }}
            .sidebar-section {{ padding: 15px; border-bottom: 2px solid #eee; display: flex; flex-direction: column; overflow: hidden; }}
            .sidebar-section.active {{ background: #f8fbff; flex: 1; }}
            .comparison-active-sidebar .sidebar-section {{ flex: 1; }}
            #chart-area {{ flex: 1; display: flex; flex-direction: column; padding: 15px; gap: 15px; overflow: hidden; min-height: 0; }}
            #chart-area.comparison-active {{ flex-direction: row; }}
            .chart-wrapper {{ background: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); flex: 1; display: flex; flex-direction: column; overflow: hidden; min-width: 0; }}
            .chart-div {{ flex: 1; width: 100%; min-height: 0; }}
            .chart-header {{ padding: 10px 15px; background: #fafafa; border-bottom: 1px solid #eee; display: flex; flex-direction: column; font-size: 13px; border-radius: 8px 8px 0 0; flex-shrink: 0; }}
            .header-top {{ display: flex; justify-content: space-between; align-items: center; font-weight: bold; margin-bottom: 4px; }}
            .header-stats {{ font-size: 11px; color: #666; font-weight: bold; display: flex; justify-content: space-between; align-items: center; }}
            .header-select-container {{ display: flex; align-items: center; gap: 5px; background: #eee; padding: 2px 6px; border-radius: 4px; margin: 0 10px; }}
            .header-team-select {{ font-size: 11px; border: none; background: transparent; cursor: pointer; outline: none; font-weight: bold; }}
            .control-group {{ margin-bottom: 12px; flex-shrink: 0; }}
            .control-header {{ display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }}
            label {{ display: block; font-size: 11px; font-weight: bold; color: #555; text-transform: uppercase; }}
            select {{ width: 100%; padding: 6px; border-radius: 4px; border: 1px solid #ccc; font-size: 13px; background: white; }}
            .year-list-container {{ flex: 1; overflow-y: auto; border: 1px solid #eee; border-radius: 4px; padding: 10px; background: #fafafa; min-height: 100px; }}
            .year-item {{ display: flex; align-items: center; font-size: 12px; margin-bottom: 4px; }}
            .year-item input {{ margin-right: 8px; }}
            .btn {{ padding: 6px 12px; border-radius: 4px; border: none; cursor: pointer; font-size: 13px; font-weight: bold; transition: opacity 0.2s; }}
            .btn-blue {{ background: #3498db; color: white; }}
            .btn-red {{ background: #e74c3c; color: white; }}
            .btn-green {{ background: #2ecc71; color: white; }}
            .btn-outline {{ padding: 3px 8px; background: white; border: 1px solid #ddd; font-size: 10px; color: #666; }}
            .hidden {{ display: none !important; }}
            .year-list-container::-webkit-scrollbar {{ width: 6px; }}
            .year-list-container::-webkit-scrollbar-thumb {{ background: #ccc; border-radius: 3px; }}
            .sync-toggle {{ display: flex; align-items: center; gap: 5px; font-size: 10px; color: #444; cursor: pointer; }}
            .sync-toggle input {{ cursor: pointer; margin: 0; }}
        </style>
    </head>
    <body>
        <div id="header">
            <div style="font-size: 18px; font-weight: bold;">⚾ All-time (1877 - 2025) MLB Regular Season Schedule Frequency Analyzer</div>
            <div style="display: flex; gap: 10px;">
                <button id="add-chart-btn" class="btn btn-green" onclick="toggleComparisonMode()">Add Comparison Chart</button>
                <button class="btn btn-red" onclick="resetAll()">Global Reset</button>
            </div>
        </div>
        <div id="main-container">
            <div id="sidebar">
                <div style="padding: 12px 15px; border-bottom: 1px solid #ddd; font-weight: bold; background: #f4f7f6; font-size: 14px; flex-shrink: 0;">Dashboard Controls</div>
                <div id="controls-A" class="sidebar-section active">
                    <div class="control-header">
                        <div style="color: #3498db; font-size: 13px; font-weight: bold;">CHART A CONFIG</div>
                        <button class="btn btn-outline" onclick="resetChart('A')">Reset</button>
                    </div>
                    <div class="control-group">
                        <label>Select Era</label>
                        <select id="era-select-A" onchange="handleEraDropdown('A')"></select>
                    </div>
                    <div style="font-size: 10px; font-weight: bold; color: #888; text-align: center; margin: 5px 0;">OR</div>
                    <label>Stack Individual Years (Max 10)</label>
                    <div id="year-list-A" class="year-list-container"></div>
                </div>
                <div id="controls-B" class="sidebar-section hidden">
                    <div class="control-header">
                        <div style="color: #e67e22; font-size: 13px; font-weight: bold;">CHART B CONFIG</div>
                        <button class="btn btn-outline" onclick="resetChart('B')">Reset</button>
                    </div>
                    <div class="control-group">
                        <label>Select Era</label>
                        <select id="era-select-B" onchange="handleEraDropdown('B')"></select>
                    </div>
                    <div style="font-size: 10px; font-weight: bold; color: #888; text-align: center; margin: 5px 0;">OR</div>
                    <label>Stack Individual Years (Max 10)</label>
                    <div id="year-list-B" class="year-list-container"></div>
                </div>
            </div>
            <div id="chart-area">
                <div id="wrapper-A" class="chart-wrapper">
                    <div class="chart-header">
                        <div class="header-top">
                            <span>CHART A</span>
                            <div class="header-select-container">
                                <label style="font-size: 9px; margin: 0; color: #888;">TEAM:</label>
                                <select id="team-select-A" class="header-team-select" onchange="updateChart('A')"></select>
                            </div>
                            <span id="title-A" style="color: #3498db; flex: 1; text-align: right; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">All Time</span>
                        </div>
                        <div class="header-stats" id="stats-A">Total Games: 0</div>
                    </div>
                    <div id="plotly-A" class="chart-div"></div>
                </div>
                <div id="wrapper-B" class="chart-wrapper hidden">
                    <div class="chart-header">
                        <div class="header-top">
                            <span>CHART B</span>
                            <div class="header-select-container">
                                <label style="font-size: 9px; margin: 0; color: #888;">TEAM:</label>
                                <select id="team-select-B" class="header-team-select" onchange="updateChart('B')"></select>
                            </div>
                            <span id="title-B" style="color: #e67e22; flex: 1; text-align: right; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">All Time</span>
                        </div>
                        <div class="header-stats">
                            <span id="stats-B">Total Games: 0</span>
                            <label class="sync-toggle"><input type="checkbox" id="sync-y-axis" onchange="handleSyncChange()"> Same y-axis</label>
                        </div>
                    </div>
                    <div id="plotly-B" class="chart-div"></div>
                </div>
            </div>
        </div>
        <script>
            const masterRecords = {records_json}; 
            const years = {years_json};
            const eras = {eras_json};
            const eraRanges = {era_ranges_json}; 
            const allTeamCodes = {all_teams_json}; 
            const modernTeams = {modern_teams_json}; 
            const monthDays = {month_days_json};

            let isComparisonMode = false;
            let syncYAxis = false;
            let chartStates = {{ 
                'A': {{ selectedYears: [], eraIdx: 0, currentData: [], currentRecent: [], currentLabel: "", maxVal: 0 }}, 
                'B': {{ selectedYears: [], eraIdx: 0, currentData: [], currentRecent: [], currentLabel: "", maxVal: 0 }} 
            }};

            function init() {{
                ['A', 'B'].forEach(id => {{
                    const eraSelect = document.getElementById(`era-select-${{id}}`);
                    eras.forEach((name, i) => {{
                        const opt = document.createElement('option');
                        opt.value = i; opt.textContent = name;
                        eraSelect.appendChild(opt);
                    }});
                    const teamSelect = document.getElementById(`team-select-${{id}}`);
                    const defaultOpt = document.createElement('option');
                    defaultOpt.value = "0"; defaultOpt.textContent = "All Teams";
                    teamSelect.appendChild(defaultOpt);
                    modernTeams.forEach((code) => {{
                        const globalID = allTeamCodes.indexOf(code) + 1;
                        if (globalID > 0) {{
                            const opt = document.createElement('option');
                            opt.value = globalID.toString(); 
                            opt.textContent = code;
                            teamSelect.appendChild(opt);
                        }}
                    }});
                    const yearList = document.getElementById(`year-list-${{id}}`);
                    years.forEach((y, i) => {{
                        const div = document.createElement('div');
                        div.className = 'year-item';
                        div.innerHTML = `<input type="checkbox" class="cb-${{id}}" id="y-${{id}}-${{y}}" onchange="handleYearToggle('${{id}}', ${{i}})"> <label for="y-${{id}}-${{y}}">${{y}}</label>`;
                        yearList.appendChild(div);
                    }});
                }});
                updateChart('A');
                updateChart('B');
            }}

            function toggleComparisonMode() {{
                isComparisonMode = !isComparisonMode;
                const area = document.getElementById('chart-area');
                const sidebar = document.getElementById('sidebar');
                const btn = document.getElementById('add-chart-btn');
                const wrapB = document.getElementById('wrapper-B');
                const ctrlB = document.getElementById('controls-B');
                if (isComparisonMode) {{
                    area.classList.add('comparison-active');
                    sidebar.classList.add('comparison-active-sidebar');
                    wrapB.classList.remove('hidden');
                    ctrlB.classList.remove('hidden');
                    btn.textContent = "Remove Comparison";
                    btn.classList.replace('btn-green', 'btn-red');
                }} else {{
                    area.classList.remove('comparison-active');
                    sidebar.classList.remove('comparison-active-sidebar');
                    wrapB.classList.add('hidden');
                    ctrlB.classList.add('hidden');
                    btn.textContent = "Add Comparison Chart";
                    btn.classList.replace('btn-red', 'btn-green');
                    if(syncYAxis) {{
                        document.getElementById('sync-y-axis').checked = false;
                        syncYAxis = false;
                    }}
                }}
                setTimeout(() => {{ 
                    Plotly.Plots.resize('plotly-A'); 
                    if (isComparisonMode) Plotly.Plots.resize('plotly-B'); 
                    refreshBothCharts();
                }}, 10);
            }}

            function handleEraDropdown(id) {{
                const cbs = document.querySelectorAll(`.cb-${{id}}`);
                cbs.forEach(cb => cb.checked = false);
                chartStates[id].selectedYears = [];
                updateChart(id);
            }}

            function handleYearToggle(id, yearIdx) {{
                const cbs = Array.from(document.querySelectorAll(`.cb-${{id}}:checked`));
                if (cbs.length > 10) {{
                    document.getElementById(`y-${{id}}-${{years[yearIdx]}}`).checked = false;
                    alert("Maximum 10 years per chart.");
                    return;
                }}
                chartStates[id].selectedYears = cbs.map(cb => {{
                    const val = parseInt(cb.id.split('-').pop());
                    return years.indexOf(val);
                }});
                updateChart(id);
            }}

            function resetChart(id) {{
                document.getElementById(`era-select-${{id}}`).value = "0";
                document.getElementById(`team-select-${{id}}`).value = "0";
                const cbs = document.querySelectorAll(`.cb-${{id}}`);
                cbs.forEach(cb => cb.checked = false);
                chartStates[id].selectedYears = [];
                updateChart(id);
            }}

            function handleSyncChange() {{
                syncYAxis = document.getElementById('sync-y-axis').checked;
                refreshBothCharts();
            }}

            function refreshBothCharts() {{
                if (syncYAxis && isComparisonMode) {{
                    const globalMax = Math.max(chartStates['A'].maxVal, chartStates['B'].maxVal);
                    renderPlotly('A', chartStates['A'].currentData, chartStates['A'].currentRecent, chartStates['A'].currentLabel, globalMax);
                    renderPlotly('B', chartStates['B'].currentData, chartStates['B'].currentRecent, chartStates['B'].currentLabel, globalMax);
                }} else {{
                    renderPlotly('A', chartStates['A'].currentData, chartStates['A'].currentRecent, chartStates['A'].currentLabel, chartStates['A'].maxVal);
                    if(isComparisonMode) renderPlotly('B', chartStates['B'].currentData, chartStates['B'].currentRecent, chartStates['B'].currentLabel, chartStates['B'].maxVal);
                }}
            }}

            function updateChart(id) {{
                const state = chartStates[id];
                const eraIdx = parseInt(document.getElementById(`era-select-${{id}}`).value);
                const teamID = parseInt(document.getElementById(`team-select-${{id}}`).value);
                let filtered;
                let label = "";

                if (state.selectedYears.length > 0) {{
                    const activeYears = state.selectedYears.map(idx => years[idx]);
                    filtered = masterRecords.filter(r => activeYears.includes(r[0]));
                    const yearLabels = activeYears.sort();
                    label = yearLabels.length > 3 ? `${{yearLabels[0]}}...${{yearLabels[yearLabels.length-1]}}` : yearLabels.join(", ");
                }} else {{
                    const range = eraRanges[eraIdx];
                    filtered = masterRecords.filter(r => r[0] >= range[0] && r[0] <= range[1]);
                    label = eras[eraIdx];
                }}

                if (teamID !== 0) {{
                    filtered = filtered.filter(r => r[2] === teamID || r[3] === teamID);
                    label += ` (${{allTeamCodes[teamID - 1]}})`;
                }}

                const counts = new Array(365).fill(0);
                const recents = new Array(365).fill(0);
                filtered.forEach(r => {{
                    const dayIdx = r[1] - 1;
                    counts[dayIdx]++;
                    recents[dayIdx] = Math.max(recents[dayIdx], r[0]);
                }});
                
                state.currentData = counts;
                state.currentRecent = recents;
                state.currentLabel = label;
                state.maxVal = Math.max(...counts);
                const totalGames = counts.reduce((a, b) => a + b, 0);
                document.getElementById(`stats-${{id}}`).textContent = `Total Games: ${{totalGames.toLocaleString()}}`;
                refreshBothCharts();
            }}

            function renderPlotly(id, data, recent, label, maxForAxis) {{
                const target = `plotly-${{id}}`;
                document.getElementById(`title-${{id}}`).textContent = label;
                const yLim = maxForAxis > 0 ? maxForAxis * 1.1 : 10;
                const gapData = data.map(v => v === 0 ? yLim : 0);
                const traceGap = {{
                    x: Array.from({{length: 365}}, (_, i) => i + 1),
                    y: gapData, type: 'bar', marker: {{ color: 'rgba(230, 230, 230, 0.4)' }}, hoverinfo: 'skip'
                }};
                const traceMain = {{
                    x: Array.from({{length: 365}}, (_, i) => i + 1),
                    y: data, type: 'bar', marker: {{ color: id === 'A' ? '#3498db' : '#e67e22' }},
                    text: monthDays, customdata: recent,
                    hovertemplate: "<b>Date:</b> %{{text}}<br><b>Games:</b> %{{y}}<br><b>Recent:</b> %{{customdata}}<extra></extra>"
                }};
                const layout = {{
                    xaxis: {{ tickmode: 'array', tickvals: [1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335], 
                             ticktext: ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'], fixedrange: false }},
                    yaxis: {{ range: [0, yLim], fixedrange: false }},
                    template: 'plotly_white', barmode: 'overlay', showlegend: false,
                    margin: {{ t: 20, b: 30, l: 30, r: 10 }}, autosize: true
                }};
                Plotly.react(target, [traceGap, traceMain], layout);
            }}

            function resetAll() {{
                ['A', 'B'].forEach(id => resetChart(id));
                if (isComparisonMode) toggleComparisonMode();
            }}
            window.onload = init;
        </script>
    </body>
    </html>
    """

    with open('game_frequency_chart.html', 'w', encoding='utf-8') as f:
        f.write(final_html)
    
    print(f"Data extraction refined: Official games count verified with improved cancellation filtering.")

if __name__ == "__main__":
    analyze_game_schedules()