import asyncio
import re
import sys
from typing import Optional, Tuple
import threading # Import threading
import logging # Import logging
import nest_asyncio # Import nest_asyncio to handle gevent/asyncio conflicts

from flask import Blueprint, request, jsonify, current_app # Added current_app for potential logging

# Apply nest_asyncio patch to allow nested event loops
nest_asyncio.apply()

# Set up a logger for this module
logger = logging.getLogger(__name__)

try:
    from playwright.async_api import async_playwright, Error as PlaywrightError
except ImportError:
    # This is a server-side component, so we can log this or handle as needed
    # For now, we'll let it raise an error if Playwright isn't installed
    # as the app won't function correctly without it.
    logger.error("ERROR: Playwright is required. Install with: pip install playwright && playwright install chromium")
    # Depending on deployment, you might handle this differently.
    # sys.exit(1) # Not suitable for a web server

twitter_bp = Blueprint('twitter_utils', __name__, url_prefix='/twitter')

async def get_twitter_profile_info_async(username: str) -> Tuple[Optional[str], Optional[str]]:
    """
    Fetch a Twitter/X user profile, find the cover image URL, and extract the Twitter ID.
    This is the asynchronous version for use with Playwright.
    """
    logger.info(f"[TWITTER_UTILS] Attempting to get profile info for: {username}")
    profile_url = f"https://x.com/{username}"
    logger.info(f"[TWITTER_UTILS] Profile URL: {profile_url}")
    
    async with async_playwright() as p:
        browser = None # Initialize browser to None for finally block
        try:
            logger.info(f"[TWITTER_UTILS] Launching browser for {username}...")
            browser = await p.chromium.launch(
                headless=True, 
                args=[
                    '--no-sandbox', 
                    '--disable-setuid-sandbox',
                    '--disable-blink-features=AutomationControlled',
                    '--disable-web-security',
                    '--disable-dev-shm-usage',
                    '--disable-gpu',
                    '--no-first-run',
                    '--no-default-browser-check',
                    '--disable-default-apps'
                ]
            )
            logger.info(f"[TWITTER_UTILS] Browser launched: {browser is not None}")
            
            context = await browser.new_context(
                viewport={"width": 1920, "height": 1080},
                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
                extra_http_headers={
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                    'Accept-Language': 'en-US,en;q=0.5',
                    'Accept-Encoding': 'gzip, deflate',
                    'Connection': 'keep-alive',
                    'Upgrade-Insecure-Requests': '1',
                }
            )
            logger.info("[TWITTER_UTILS] Browser context created.")
            
            page = await context.new_page()
            
            # Add script to remove webdriver property
            await page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
            
            logger.info("[TWITTER_UTILS] New page created.")
            
            logger.info(f"[TWITTER_UTILS] Navigating to {profile_url}...")
            # Keep a generous timeout for initial navigation, networkidle is important.
            await page.goto(profile_url, wait_until="networkidle", timeout=45000) 
            logger.info(f"[TWITTER_UTILS] Navigation to {profile_url} completed using networkidle.")

            # Polling loop for content
            max_polling_attempts = 12  # Approx 12 * 0.75s = 9 seconds max polling time for banner
            polling_interval_seconds = 0.75
            html_content = "" # Initialize html_content
            found_in_poll = False

            for attempt in range(max_polling_attempts):
                logger.info(f"[TWITTER_UTILS] Polling attempt {attempt + 1}/{max_polling_attempts} for content...")
                current_html_content = await page.content()
                
                if not current_html_content:
                    logger.warning("[TWITTER_UTILS] No HTML content fetched during polling attempt.")
                    if attempt < max_polling_attempts - 1:
                        await asyncio.sleep(polling_interval_seconds)
                    continue
                
                html_content = current_html_content # Store the latest content
                # Log a sample of current HTML for debugging this attempt
                # sample_for_log = html_content[:3000] # Log a larger chunk from inside the loop
                # logger.info(f"[TWITTER_UTILS] HTML sample in poll attempt {attempt + 1}: {sample_for_log}...")
                
                if "pbs.twimg.com/profile_banners/" in html_content:
                    logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Substring 'pbs.twimg.com/profile_banners/' FOUND.")
                    try:
                        idx = html_content.find("pbs.twimg.com/profile_banners/")
                        start_idx = max(0, idx - 100)
                        end_idx = min(len(html_content), idx + len("pbs.twimg.com/profile_banners/") + 100)
                        context_sample = html_content[start_idx:end_idx]
                        logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Context sample around substring: ...{context_sample}...")
                    except Exception as e:
                        logger.warning(f"[TWITTER_UTILS] Poll {attempt+1}: Error logging context sample: {e}")
                else:
                    logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Substring 'pbs.twimg.com/profile_banners/' NOT FOUND.")

                # 1a. Check for banner in CSS background-image style (NEW PRIMARY CHECK IN POLL)
                # Regex to find the banner URL and ID within a style="background-image: url(...)"
                # Captures: 1. Full URL, 2. Twitter ID
                style_banner_pattern = r'background-image:\s*url\((?:&quot;|")?(https://pbs\.twimg\.com/profile_banners/(\d+)/[^)]+?)(?:&quot;|")?\)'
                style_match = re.search(style_banner_pattern, html_content)
                logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Style_banner_pattern match result: {style_match}")

                if style_match:
                    cover_image_url = style_match.group(1).replace("&amp;", "&") # Ensure ampersands are correct
                    twitter_id = style_match.group(2)
                    logger.info(f"[TWITTER_UTILS] SUCCESS in Poll {attempt+1}: Found ID '{twitter_id}' and Cover URL '{cover_image_url}' via STYLE pattern.")
                    found_in_poll = True
                    return cover_image_url, twitter_id

                # 1b. Check for original primary success condition (bare banner pattern) - (Secondary check in poll)
                banner_pattern = r'https://pbs\\.twimg\\.com/profile_banners/(\\d+)/'
                match = re.search(banner_pattern, html_content)
                logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Original banner_pattern match result: {match}")

                if match:
                    twitter_id = match.group(1)
                    logger.info(f"[TWITTER_UTILS] SUCCESS in Poll {attempt+1}: Found Twitter ID '{twitter_id}' via ORIGINAL banner pattern.")
                    # Try to get the full URL for this one too
                    full_banner_url_pattern = rf'(https://pbs\\.twimg\\.com/profile_banners/{re.escape(twitter_id)}/\\d+/\\d+x\\d+)'
                    full_match = re.search(full_banner_url_pattern, html_content)
                    cover_image_url = full_match.group(1).replace("&amp;", "&") if full_match else None
                    if cover_image_url:
                        logger.info(f"[TWITTER_UTILS] Found cover image for original pattern: {cover_image_url}")
                    else:
                        logger.info("[TWITTER_UTILS] Cover image URL not found for original pattern (ID present).")
                    found_in_poll = True
                    return cover_image_url, twitter_id
                
                # 2. Check for "profile doesn't exist" or critical error indicators
                error_indicators = [
                    "this account doesn't exist", 
                    "hmm...this page doesn't exist", 
                    "page not found",
                    "account suspended",
                ]
                html_content_lower = html_content.lower()
                for indicator in error_indicators:
                    if indicator in html_content_lower:
                        logger.warning(f"[TWITTER_UTILS] Poll {attempt+1}: Profile error indicator found: '{indicator}'")
                        found_in_poll = True # Technically a form of finding something definitive
                        return None, None # Explicit failure, stop polling

                # 3. If not success and not explicit failure, wait for next attempt
                if attempt < max_polling_attempts - 1:
                    logger.info(f"[TWITTER_UTILS] Poll {attempt+1}: Primary conditions not met. Waiting...")
                    await asyncio.sleep(polling_interval_seconds)
                else:
                    logger.warning(f"[TWITTER_UTILS] Max polling attempts for banner reached. found_in_poll={found_in_poll}")
            
            # If loop finishes, html_content is from the last attempt. Now try fallbacks.
            if not found_in_poll:
                 logger.info("[TWITTER_UTILS] Primary banner/error not found during polling. Trying alternative patterns on last fetched content...")
            
            if not html_content:
                logger.warning("[TWITTER_UTILS] HTML content is empty after polling loop, cannot try fallbacks.")
                return None, None

            # Temporarily log a sample of the HTML to help debug the regex issue if fallbacks are needed
            sample_length = min(1000, len(html_content))
            logger.info(f"[TWITTER_UTILS] HTML sample for fallbacks (first {sample_length} chars): {html_content[:sample_length]}")
            if "profile_banners" in html_content: # Check if the raw string is there at all
                logger.info("[TWITTER_UTILS] 'profile_banners' string IS in final HTML for fallbacks.")
            else:
                logger.info("[TWITTER_UTILS] 'profile_banners' string NOT in final HTML for fallbacks.")

            # Fallback patterns from the previously working version
            twitter_id = None
            match = None # Reset match variable for fallbacks

            patterns_to_try = {
                "rest_id": r'"rest_id":"(\d+)"',
                "data-user-id": r'data-user-id="(\d+)"',
                "id_str": r'"id_str":"(\d+)"',
                "user_id_json": r'"user_id":"(\d+)"', # Common in JSON contexts
                "profile_images": r'profile_images/(\d+)/',
                "long_numeric_id": r'"(\d{15,19})"' # Broad search for 15-19 digit numbers in quotes
            }

            for name, pattern_regex in patterns_to_try.items():
                current_match = None
                if name == "long_numeric_id":
                    # findall returns a list of strings, not match objects
                    # We'll take the first one if any are found
                    possible_ids = re.findall(pattern_regex, html_content)
                    if possible_ids:
                        logger.info(f"[TWITTER_UTILS] Found potential Twitter IDs (15-19 digits): {possible_ids[:5]}")
                        # Create a mock match object for consistency if needed, or just use the ID
                        twitter_id = possible_ids[0] # Use the first one
                        logger.info(f"[TWITTER_UTILS] Using first potential Twitter ID from long_numeric_id: {twitter_id}")
                        # For this specific fallback, we usually don't get a cover_image_url
                        return None, twitter_id 
                else:
                    current_match = re.search(pattern_regex, html_content)
                
                if current_match:
                    logger.info(f"[TWITTER_UTILS] Found match with alternative pattern '{name}': {current_match.group(1)}")
                    twitter_id = current_match.group(1)
                    # Most fallbacks don't give banner URL, but if it's profile_images or similar, one might try.
                    # For simplicity, returning None for cover_url from these fallbacks.
                    return None, twitter_id 
            
            if twitter_id: # Should have returned inside loop if an alt pattern matched
                 pass # Should not be reached if an alternative pattern matches and returns
            
            logger.warning(f"[TWITTER_UTILS] All primary and alternative patterns failed for {username}.")
            return None, None
                
        except PlaywrightError as e:
            logger.error(f"[TWITTER_UTILS] Playwright error in get_twitter_profile_info_async for {username}: {e}", exc_info=True)
            return None, None
        except Exception as e:
            logger.error(f"[TWITTER_UTILS] Unexpected error in get_twitter_profile_info_async for {username}: {e}", exc_info=True)
            return None, None
        finally:
            if browser:
                try:
                    logger.info("[TWITTER_UTILS] Closing browser...")
                    await browser.close()
                    logger.info("[TWITTER_UTILS] Browser closed.")
                except Exception as e:
                    logger.error(f"[TWITTER_UTILS] Error closing browser in get_twitter_profile_info_async: {e}", exc_info=True)
                    pass
            logger.info(f"[TWITTER_UTILS] Exiting get_twitter_profile_info_async for {username}.")

def run_async_in_thread(username: str) -> Tuple[Optional[str], Optional[str]]:
    logger.info(f"[TWITTER_UTILS_THREAD] Starting thread for {username}")
    result_container = [(None, None)]

    def target():
        logger.info(f"[TWITTER_UTILS_THREAD] Thread target running for {username} (with nest_asyncio)")
        try:
            # With nest_asyncio applied, asyncio.run() should work even in gevent-patched environments
            result_container[0] = asyncio.run(get_twitter_profile_info_async(username))
            logger.info(f"[TWITTER_UTILS_THREAD] Async task completed via asyncio.run() with nest_asyncio for {username}. Result: {result_container[0]}")
        except RuntimeError as e:
            logger.error(f"[TWITTER_UTILS_THREAD] RuntimeError in thread target for {username} (with nest_asyncio): {e}", exc_info=True)
            result_container[0] = (None, None) 
        except Exception as e: 
            logger.error(f"[TWITTER_UTILS_THREAD] General Exception in thread target for {username} (with nest_asyncio): {e}", exc_info=True)
            result_container[0] = (None, None) 
        finally:
            logger.info(f"[TWITTER_UTILS_THREAD] Thread target finished for {username}")

    thread = threading.Thread(target=target)
    thread.start()
    thread.join() 
    
    logger.info(f"[TWITTER_UTILS_THREAD] Thread for {username} joined. Final result from container: {result_container[0]}")
    if isinstance(result_container[0], tuple) and len(result_container[0]) == 2:
        return result_container[0]
    else:
        logger.warning(f"[TWITTER_UTILS_THREAD] Result container for {username} did not contain a valid tuple. Contained: {result_container[0]}. Returning (None,None).")
        return (None, None)

@twitter_bp.route('/get_id/<username>', methods=['GET'])
def get_id_route(username):
    """
    Flask endpoint to get Twitter ID and cover image URL.
    Uses a separate thread to run the async Playwright code to avoid event loop conflicts.
    """
    logger.info(f"[TWITTER_ROUTE] Received request for /get_id/{username}")
    if not username:
        logger.warning("[TWITTER_ROUTE] Username parameter is missing.")
        return jsonify({"error": "Username parameter is required"}), 400

    try:
        cover_url, twitter_id = run_async_in_thread(username)
        logger.info(f"[TWITTER_ROUTE] Result from run_async_in_thread for {username}: twitter_id={twitter_id}, cover_url={cover_url}")

        if twitter_id:
            logger.info(f"[TWITTER_ROUTE] Successfully retrieved Twitter ID for {username}: {twitter_id}")
            return jsonify({
                "username": username,
                "twitter_id": twitter_id,
                "cover_image_url": cover_url
            }), 200
        else:
            logger.warning(f"[TWITTER_ROUTE] Failed to retrieve Twitter ID for {username}. Playwright function returned no ID.")
            return jsonify({"error": f"Could not retrieve information for {username}. Playwright function might have failed silently or found no data."}), 404
    except Exception as e:
        logger.error(f"[TWITTER_ROUTE] Error in get_id_route for {username}: {e}", exc_info=True) 
        return jsonify({"error": "An internal server error occurred", "details": str(e)}), 500

# If you want a main function for testing this script directly (optional)
# async def main_test(test_username="leagueoflegends"):
#     print(f"Attempting to fetch info for: {test_username}")
#     cover_url, twitter_id = await get_twitter_profile_info_async(test_username)
    
#     if cover_url and twitter_id:
#         print("\\nResults:")
#         print(f"Username: {test_username}")
#         print(f"Cover Image URL: {cover_url}")
#         print(f"Twitter ID: {twitter_id}")
#     else:
#         print(f"\\nCould not retrieve information for {test_username}.")

# if __name__ == "__main__":
#     # This part is for direct script execution testing, not for Flask app
#     # To test, you would run: python blueprints/twitter_utils.py <username>
#     test_user = "leagueoflegends"
#     if len(sys.argv) > 1:
#         test_user = sys.argv[1]
#     asyncio.run(main_test(test_user)) 