o
    hxAhG                     @   s
  d dl Z d dlZd dlZd dlmZmZ d dlZd dlZd dlZd dl	m
Z
mZmZmZ e  eeZz
d dlmZmZ W n eyM   ed Y nw e
deddZd	ed
eee ee f fddZd	ed
eee ee f fddZejddgddd ZdS )    N)OptionalTuple)	Blueprintrequestjsonifycurrent_app)async_playwrightErrorzbERROR: Playwright is required. Install with: pip install playwright && playwright install chromiumtwitter_utilsz/twitter)
url_prefixusernamereturnc           "         s  t d|   d|  }t d|  t 4 I dH }d}zxzt d|  d |jjdg dd	I dH }t d
|du  |jdddddddddddI dH }t d | I dH }|dI dH  t d t d| d |j|dddI dH  t d| d d}d }d!}d"}	t	|D ]^}
t d#|
d$  d%| d& |
 I dH }|st d' |
|d$ k rt|I dH  q|}d(|v r+t d)|
d$  d* z/|d(}td+|d, }tt||td( d, }||| }t d)|
d$  d-| d W n+ ty* } zt d)|
d$  d.|  W Y d}~nd}~ww t d)|
d$  d/ d0}t||}t d)|
d$  d1|  |r|d$d2d3}|d4}t d5|
d$  d6| d7| d8 d}	||f  W W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W  d  I dH  S d?}t||}t d)|
d$  d@|  |rh|d$}t d5|
d$  dA| dB dCt| dD}t||}|r|d$d2d3nd}|rt dE|  nt dF d}	||f  W W |rTzt d9 | I dH  t d: W n tyS } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W  d  I dH  S g dG}| }|D ]d}||v rt d)|
d$  dH| dI d}	  W W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W d  I dH  dJS qr|
|d$ k rt d)|
d$  dK t|I dH  qt dL|	  q|	st dM |sUt dN W W |rAzt d9 | I dH  t d: W n ty@ } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W d  I dH  dJS tdOt|}t dP| dQ|d|   dR|v rvt dS nt dT d}d}dUdVdWdXdYdZd[}| D ]\}}d} |d\krt||}!|!rt d]|!dd^   |!d+ }t d_|  d|f  W W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W  d  I dH  S nt||} | rst d`| da| d$  | d$}d|f  W W |r_zt d9 | I dH  t d: W n ty^ } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W  d  I dH  S q|ry	 t db|  d> W W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W d  I dH  dJS  ty3 } z[t jdc|  dd| dd< W Y d}~W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W d  I dH  dJS d}~w ty } z[t jde|  dd| dd< W Y d}~W |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> W d  I dH  dJS d}~ww |rzt d9 | I dH  t d: W n ty } zt jd;| dd< W Y d}~nd}~ww t d=|  d> w 1 I dH sw   Y  dS )fz
    Fetch a Twitter/X user profile, find the cover image URL, and extract the Twitter ID.
    This is the asynchronous version for use with Playwright.
    z4[TWITTER_UTILS] Attempting to get profile info for: zhttps://x.com/z[TWITTER_UTILS] Profile URL: Nz&[TWITTER_UTILS] Launching browser for z...T)	z--no-sandboxz--disable-setuid-sandboxz---disable-blink-features=AutomationControlledz--disable-web-securityz--disable-dev-shm-usagez--disable-gpuz--no-first-runz--no-default-browser-checkz--disable-default-apps)headlessargsz"[TWITTER_UTILS] Browser launched: i  i8  )widthheightzoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8zen-US,en;q=0.5zgzip, deflatez
keep-alive1)AcceptzAccept-LanguagezAccept-Encoding
ConnectionzUpgrade-Insecure-Requests)Zviewport
user_agentZextra_http_headersz([TWITTER_UTILS] Browser context created.zEObject.defineProperty(navigator, 'webdriver', {get: () => undefined})z![TWITTER_UTILS] New page created.z[TWITTER_UTILS] Navigating to Znetworkidleiȯ  )Z
wait_untiltimeoutz[TWITTER_UTILS] Navigation to z completed using networkidle.   g      ? Fz [TWITTER_UTILS] Polling attempt    /z for content...z?[TWITTER_UTILS] No HTML content fetched during polling attempt.zpbs.twimg.com/profile_banners/z[TWITTER_UTILS] Poll z3: Substring 'pbs.twimg.com/profile_banners/' FOUND.r   d   z&: Context sample around substring: ...z : Error logging context sample: z7: Substring 'pbs.twimg.com/profile_banners/' NOT FOUND.zkbackground-image:\s*url\((?:&quot;|")?(https://pbs\.twimg\.com/profile_banners/(\d+)/[^)]+?)(?:&quot;|")?\)z%: Style_banner_pattern match result: z&amp;&   z [TWITTER_UTILS] SUCCESS in Poll z: Found ID 'z' and Cover URL 'z' via STYLE pattern.z"[TWITTER_UTILS] Closing browser...z[TWITTER_UTILS] Browser closed.zI[TWITTER_UTILS] Error closing browser in get_twitter_profile_info_async: exc_infoz;[TWITTER_UTILS] Exiting get_twitter_profile_info_async for .z1https://pbs\\.twimg\\.com/profile_banners/(\\d+)/z(: Original banner_pattern match result: z: Found Twitter ID 'z' via ORIGINAL banner pattern.z+(https://pbs\\.twimg\\.com/profile_banners/z/\\d+/\\d+x\\d+)z8[TWITTER_UTILS] Found cover image for original pattern: zL[TWITTER_UTILS] Cover image URL not found for original pattern (ID present).)zthis account doesn't existzhmm...this page doesn't existzpage not foundzaccount suspendedz": Profile error indicator found: ''NNz(: Primary conditions not met. Waiting...zG[TWITTER_UTILS] Max polling attempts for banner reached. found_in_poll=zu[TWITTER_UTILS] Primary banner/error not found during polling. Trying alternative patterns on last fetched content...zO[TWITTER_UTILS] HTML content is empty after polling loop, cannot try fallbacks.i  z1[TWITTER_UTILS] HTML sample for fallbacks (first z	 chars): Zprofile_bannerszH[TWITTER_UTILS] 'profile_banners' string IS in final HTML for fallbacks.zI[TWITTER_UTILS] 'profile_banners' string NOT in final HTML for fallbacks.z"rest_id":"(\d+)"zdata-user-id="(\d+)"z"id_str":"(\d+)"z"user_id":"(\d+)"zprofile_images/(\d+)/z"(\d{15,19})")Zrest_idzdata-user-idZid_strZuser_id_jsonZprofile_imageslong_numeric_idr#   z<[TWITTER_UTILS] Found potential Twitter IDs (15-19 digits):    zG[TWITTER_UTILS] Using first potential Twitter ID from long_numeric_id: z6[TWITTER_UTILS] Found match with alternative pattern 'z': z@[TWITTER_UTILS] All primary and alternative patterns failed for zG[TWITTER_UTILS] Playwright error in get_twitter_profile_info_async for : zG[TWITTER_UTILS] Unexpected error in get_twitter_profile_info_async for )loggerinfor   Zchromiumlaunchnew_contextZnew_pageZadd_init_scriptZgotorangecontentwarningasynciosleepfindmaxminlen	ExceptionresearchgroupreplacecloseerrorescapeloweritemsfindallPlaywrightError)"r   Zprofile_urlpbrowsercontextpageZmax_polling_attemptsZpolling_interval_secondsZhtml_contentZfound_in_pollattemptZcurrent_html_contentidx	start_idxZend_idxZcontext_sampleeZstyle_banner_patternZstyle_matchcover_image_url
twitter_idZbanner_patternmatchZfull_banner_url_patternZ
full_matchZerror_indicatorsZhtml_content_lowerZ	indicatorZsample_lengthZpatterns_to_trynamepattern_regexZcurrent_matchZpossible_ids rL   3/var/www/html/brandlife/blueprints/twitter_utils.pyget_twitter_profile_info_async   s&  





 &
"k
 ^

X
 q
J
 y

;
  

	

  )

  3	
  @
  C
  C
 rN   c                    s   t d  dg  fdd}tj|d}|  |  t d d d   t d tr@t d d	kr@ d S t 	d
 d d  d dS )Nz+[TWITTER_UTILS_THREAD] Starting thread for r"   c               
      s$  t d d zztt d< t d d d   W nE tyE }  zt jd d|  dd	 d
 d< W Y d } ~ n2d } ~ w tyg }  zt jd d|  dd	 d
 d< W Y d } ~ nd } ~ ww W t d  d S W t d  d S W t d  d S t d  w )Nz1[TWITTER_UTILS_THREAD] Thread target running for z (with nest_asyncio)r   zT[TWITTER_UTILS_THREAD] Async task completed via asyncio.run() with nest_asyncio for z
. Result: z9[TWITTER_UTILS_THREAD] RuntimeError in thread target for z (with nest_asyncio): Tr   r"   z>[TWITTER_UTILS_THREAD] General Exception in thread target for z2[TWITTER_UTILS_THREAD] Thread target finished for )r&   r'   r-   runrN   RuntimeErrorr9   r3   )rF   result_containerr   rL   rM   target   s&   &z#run_async_in_thread.<locals>.target)rS   z"[TWITTER_UTILS_THREAD] Thread for z& joined. Final result from container: r   r   z,[TWITTER_UTILS_THREAD] Result container for z+ did not contain a valid tuple. Contained: z. Returning (None,None).)
r&   r'   	threadingThreadstartjoin
isinstancetupler2   r,   )r   rS   threadrL   rQ   rM   run_async_in_thread   s   r[   z/get_id/<username>GET)methodsc              
   C   s  t d|   | st d tddidfS zBt| \}}t d|  d| d|  |rDt d	|  d
|  t| ||ddfW S t d|  d tdd|  didfW S  ty } zt jd|  d
| dd tdt|ddfW  Y d}~S d}~ww )z
    Flask endpoint to get Twitter ID and cover image URL.
    Uses a separate thread to run the async Playwright code to avoid event loop conflicts.
    z-[TWITTER_ROUTE] Received request for /get_id/z.[TWITTER_ROUTE] Username parameter is missing.r9   zUsername parameter is requiredi  z4[TWITTER_ROUTE] Result from run_async_in_thread for z: twitter_id=z, cover_url=z6[TWITTER_ROUTE] Successfully retrieved Twitter ID for r%   )r   rH   rG      z2[TWITTER_ROUTE] Failed to retrieve Twitter ID for z%. Playwright function returned no ID.z#Could not retrieve information for zB. Playwright function might have failed silently or found no data.i  z*[TWITTER_ROUTE] Error in get_id_route for Tr   z!An internal server error occurred)r9   detailsi  N)r&   r'   r,   r   r[   r3   r9   str)r   Z	cover_urlrH   rF   rL   rL   rM   get_id_route  s.   
"ra   )r-   r4   systypingr   r   rT   loggingZnest_asyncioflaskr   r   r   r   apply	getLogger__name__r&   Zplaywright.async_apir   r	   r>   ImportErrorr9   
twitter_bpr`   rN   r[   routera   rL   rL   rL   rM   <module>   s*    
" "Z