from datetime import datetime
import logging
import sys
from models import User, Call # User is not used in this file directly, consider removing if not needed elsewhere via this import
from flask import flash
import os
from models import Call, Participant # Removed redundant User import
from flask import flash
from bs4 import BeautifulSoup
import json
from utils import convert_date_to_iso


def setup_logger(logs_dir="logs"):
    """Sets up the transcript logger with proper error handling."""
    try:
        log_path = os.path.join(os.path.dirname(__file__), logs_dir) # Correct path
        os.makedirs(log_path, exist_ok=True)

        log_file_path = os.path.join(log_path, 'transcript_processing.log')
        logger = logging.getLogger('transcript_processor')  # Standard logger name
        logger.setLevel(logging.INFO)  # Set level for the logger

        if not logger.handlers:  # Check if handler already exists
            handler = logging.FileHandler(log_file_path, encoding='utf-8')
            handler.setLevel(logging.INFO)
            formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s - %(message)s')
            handler.setFormatter(formatter)
            logger.addHandler(handler)

    except Exception as e:  # Catch broader exceptions during setup
        print(f"Error setting up logging: {e}", file=sys.stderr) # Important for debugging
        return None # Indicate setup failure

    return logger


# Call the setup_logging function at the top level
transaction_logger = setup_logger() # Pass in log filename


def get_transcript_dir():
    """Determines the correct directory for transcripts based on the environment."""

    if os.name == 'posix':  # Check for Linux/Unix environment
        transcript_dir = os.path.join(os.path.dirname(__file__), 'updates')
    else:  # Assume Windows environment
        base_dir = os.environ.get('ONEDRIVE', 'C:\\Users\\marcc\\OneDrive') # Get OneDrive path from environment variable, default to a standard path
        #transcript_dir = os.path.join(base_dir, 'Updates')
        transcript_dir = os.path.join(base_dir, 'Transcripts')
    return transcript_dir


def manage_transcripts(db, number_to_process, offset=0):
    transcript_dir = get_transcript_dir()
    transaction_logger.info(f"Transcript directory: {transcript_dir}")
    processed_symbols = []
    num_processed = 0
    num_skipped = 0
    bad_files = 0

    files = os.listdir(transcript_dir)[offset:]

    for filename in files:
        if filename.endswith(".txt"):
            num_processed += 1
            if num_processed > number_to_process:
                break

            if num_processed % 100 == 0:
                print(f"Processed {num_processed} files...")
                db.session.commit()

            file_path = os.path.join(transcript_dir, filename)

            try:
                result = parse_transcript(file_path)
            except Exception as e:
                # 5. Use the dedicated logger
                transaction_logger.error(f"Error parsing file: {filename}, Error: {str(e)}")
                bad_files += 1
                continue

            if result is None: # This case is handled by parse_transcript returning (None, [], None, None, None, None, None) for Page Unavailable
                transaction_logger.warning(f"Skipping file due to parsing issues (Page Unavailable or critical error): {filename}")
                bad_files += 1
                continue

            json_transcript, participants, symbol, companyname, date, quarter, year = result

            if not all([json_transcript, participants, symbol, companyname, date, quarter, year]):
                transaction_logger.warning(f"Skipping file due to missing data: {filename}, Symbol: {symbol}, Company: {companyname}, Date: {date}, Quarter: {quarter}, Year: {year}")
                bad_files += 1
                continue

            existing_call = Call.query.filter_by(
                symbol=symbol,
                companyname=companyname,
                date=date,
                quarter=quarter,
                year=year,
            ).first()

            if existing_call:
                #transaction_logger.info(f"Skipping existing call: {filename}, Symbol: {symbol}, Company: {companyname}, Date: {date}")
                num_skipped += 1
                continue

            new_call = Call(
                symbol=symbol,
                json_transcript=json_transcript,
                summary="",
                companyname=companyname,
                date=date,
                quarter=quarter,
                year=year,
            )

            for participant_data in participants: # Renamed to avoid conflict with Participant model
                new_participant = Participant(
                    name=participant_data["name"],
                    role=participant_data["role"],
                )
                new_call.participants.append(new_participant)

            db.session.add(new_call)
            transaction_logger.info(f"Processed Sucessfully: {filename}, Symbol: {symbol}, Company: {companyname}, Date: {date}")

            if symbol not in processed_symbols:
                processed_symbols.append(symbol)

    db.session.commit()
    # Corrected the flash message to reflect the number of files attempted vs. actually processed if loop breaks early
    files_attempted_or_processed = num_processed -1 if num_processed > number_to_process else num_processed
    flash(
        f"Attempted to process {files_attempted_or_processed} files. Skipped {num_skipped} existing files, found {bad_files} bad files.",
        "success")
    return processed_symbols


def parse_transcript(file_path):
    """
    Parses an HTML transcript file to extract structured data.

    Args:
        file_path (str): The path to the HTML transcript file.

    Returns:
        tuple: Contains (json_transcript, participants, symbol, companyname, date, quarter, year).
               Returns (None, [], None, None, None, None, None) if critical parsing fails (e.g., page unavailable).
               Individual fields within the tuple can be None if specific parts couldn't be parsed.
    """
    with open(file_path, "r", encoding="utf-8") as file:
        soup = BeautifulSoup(file, "html.parser")

    # Check if the page is unavailable
    if soup.title and "Page Unavailable" in soup.title.text:
        return None, [], None, None, None, None, None

    # Initialize transcript and participants
    transcript_elements = soup.find_all("div", class_="speech")
    transcript_data = []
    for element in transcript_elements:
        speaker_tag = element.find('div', class_='participant-name')
        content_tag = element.find('div', class_='content')
        if speaker_tag and content_tag: # Ensure tags exist
            speaker = speaker_tag.text
            content = content_tag.text.strip()
            transcript_data.append({"speaker": speaker, "content": content})
    json_transcript = json.dumps(transcript_data) if transcript_data else None

    participant_table = soup.find("table", class_="participant-list")
    participants = []
    if participant_table:
        for row in participant_table.find_all("tr"):
            name_tag = row.find("td", class_="name")
            role_tag = row.find("td", class_="role")
            if name_tag and role_tag:
                name = name_tag.text.strip()
                role = role_tag.text.strip()
                participants.append({"name": name, "role": role})

    # Initialize other fields
    symbol = None
    companyname = None
    date = None
    quarter = None
    year = None

    # Attempt to parse symbol using various methods
    # Method 1: From <span class="tab-ticker">
    tab_ticker_span = soup.find("span", class_="tab-ticker")
    if tab_ticker_span:
        symbol = tab_ticker_span.text.strip()

    # Method 2: From <h1 class="page-title"> ... <span class="company-name"> ... (SYMBOL)</span>
    if not symbol:
        page_title_h1 = soup.find("h1", class_="page-title")
        if page_title_h1:
            company_name_span = page_title_h1.find("span", class_="company-name")
            if company_name_span:
                text = company_name_span.text.strip()  # Example: "Mondelez International (MDLZ)"
                if text.endswith(")") and '(' in text:
                    last_open_paren = text.rfind('(')
                    if last_open_paren != -1:
                        candidate = text[last_open_paren + 1:-1].strip()
                        # Basic validation for a ticker symbol
                        if candidate.isupper() and not ' ' in candidate and 1 <= len(candidate) <= 6:
                            symbol = candidate

    # Method 3: From <title>TAG</title> (e.g., "MDLZ - ... | ...")
    if not symbol and soup.title and soup.title.text:
        title_text = soup.title.text
        first_part_of_title = title_text.split('|')[0].strip()
        candidate_from_title = first_part_of_title.split(' - ')[0].strip()
        if candidate_from_title.isupper() and not ' ' in candidate_from_title and 1 <= len(candidate_from_title) <= 6:
            symbol = candidate_from_title

    # Parse companyname from title
    if soup.title and soup.title.text:
        title_text = soup.title.text
        title_parts = title_text.split("|")
        if len(title_parts) > 1:
            companyname = title_parts[1].strip()
        elif len(title_parts) == 1 and " - " in title_parts[0]: # Fallback if no pipe but "SYMBOL - Company Name"
            company_name_candidate = title_parts[0].split(" - ", 1)
            if len(company_name_candidate) > 1:
                # Ensure we don't just grab the symbol again if it was the first part
                potential_company_name = company_name_candidate[1].strip()
                if not (symbol and potential_company_name.startswith(symbol)): # Avoid "MDLZ - MDLZ International"
                    companyname = potential_company_name


    # Parse date, quarter, year from banner_crumbs
    banner_crumbs = soup.find_all("span", class_="banner-crumb")
    if len(banner_crumbs) >= 2:
        date_str = banner_crumbs[0].text.strip()
        date = convert_date_to_iso(date_str)
        period_full = banner_crumbs[1].text.strip() # e.g. "2025 Q1 Earnings call transcript"
        parts = period_full.split()
        # Extract year and quarter carefully
        if len(parts) > 0: # Check if parts list is not empty
            # Try to find year first (e.g., "2025")
            if parts[0].isdigit() and len(parts[0]) == 4:
                year = parts[0]
                # If year is found, quarter might be next (e.g., "Q1")
                if len(parts) > 1 and parts[1].upper().startswith('Q') and parts[1][1:].isdigit():
                    quarter = parts[1].upper()
            # If year wasn't first, try to find quarter first
            elif parts[0].upper().startswith('Q') and parts[0][1:].isdigit():
                quarter = parts[0].upper()
                # If quarter is found, year might be next
                if len(parts) > 1 and parts[1].isdigit() and len(parts[1]) == 4:
                    year = parts[1]
            # Fallback for just "Q1" or "2025" if only one is present in a recognizable format
            elif year is None and quarter is None:
                if parts[0].isdigit() and len(parts[0]) == 4:
                    year = parts[0]
                elif parts[0].upper().startswith('Q') and parts[0][1:].isdigit():
                    quarter = parts[0].upper()


    return json_transcript, participants, symbol, companyname, date, quarter, year


def check_favorites(db, processed_symbols):
    """Checks if any processed symbols match user favorites in the database.

    Args:
        db: The SQLAlchemy database session.
        processed_symbols: A list of symbols that were processed.

    Returns:
        A list of tuples, where each tuple contains:
            - The user's ID
            - The user's username
            - The matching symbol
            - The ID of the Call object (most recent for that symbol if multiple exist)
    """
    matching_favorites = []
    for user in db.session.query(User).all():
        user_favorites_list = []
        if user.favorites:
            try:
                # Assuming user.favorites is a JSON string like '["AAPL", "MSFT"]'
                loaded_favs = json.loads(user.favorites)
                if isinstance(loaded_favs, list):
                    user_favorites_list = loaded_favs
            except json.JSONDecodeError:
                transaction_logger.error(f"Could not decode favorites for user {user.id}: {user.favorites}")
                continue # Skip this user if favorites are malformed

        for symbol_processed in processed_symbols:
            if symbol_processed in user_favorites_list:
                # Find the most recent Call object for this symbol to ensure relevance
                # This assumes 'date' field can be reliably sorted; if not, use 'id' desc.
                call = db.session.query(Call).filter_by(symbol=symbol_processed).order_by(Call.date.desc(), Call.id.desc()).first()
                if call:
                    matching_favorites.append((user.id, user.username, symbol_processed, call.id))
    return matching_favorites
