U
    “	mhö)  ã                   @   sL  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lm Z  ej! "d¡ ej! "d¡ d dl#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ ej,ej-dd e .e/¡Z0G dd„ dƒZ1e/dkrHe1ƒ Z2e2 3¡  dS )é    N)ÚListÚDict)Ú	webdriver)ÚBy)ÚWebDriverWait)Úexpected_conditions)ÚService)ÚOptions)ÚChromeDriverManager)ÚGeckoDriverManagerz/appz/app/shared)ÚSearchEngineRequestÚSearchResponseÚSearchResult)ÚKafkaClientÚTOPICS)ÚRedisClientz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)ÚlevelÚformatc                   @   sŠ   e Zd Zdd„ Zeeef dœdd„Zdd„ Zdd	„ Zd
d„ Z	de
ee dœdd„Zeedœdd„Zeeedœdd„Zdd„ Zdd„ ZdS )ÚGoogleSearchServicec                 C   s.   d| _ d| _tƒ | _tƒ | _d | _|  ¡  d S )NZgooglez"https://www.google.com/search?q={})Úsearch_engineÚ
search_urlr   Úkafka_clientr   Úredis_clientÚdriverÚsetup_driver©Úself© r   úservices/google-search/main.pyÚ__init__%   s    zGoogleSearchService.__init__)Úreturnc                 C   sô   i }t  ¡ }z„|dkr.tjddgdddd}n6|dkrNtjddgdddd}ntjd	dgdddd}|jd
krŽt d|j¡}|rŽ| d¡|d	< W n   Y nX zDtjddgdddd}|jd
kràt d|j¡}|rà| d¡|d< W n   Y nX |S )zDetect installed browsersZDarwinz</Applications/Google Chrome.app/Contents/MacOS/Google Chromez	--versionTé
   )Zcapture_outputÚtextZtimeoutZLinuxzgoogle-chromeÚchromer   z(\d+\.\d+\.\d+)é   Úfirefoxz
(\d+\.\d+))	ÚplatformÚsystemÚ
subprocessÚrunÚ
returncodeÚreÚsearchÚstdoutÚgroup)r   Zbrowsersr'   ÚresultZversion_matchr   r   r   Údetect_browsers-   sT     ÿ  þ
  ÿ
  ÿ

  ÿ
z#GoogleSearchService.detect_browsersc                 C   s8   |   ¡ }d|kr|  ¡  nd|kr,|  ¡  ntdƒ‚dS )z1Setup WebDriver with cross-platform compatibilityr#   r%   zNo supported browsers detectedN)r0   Úsetup_chrome_driverÚsetup_firefox_driverÚ	Exception)r   Zavailable_browsersr   r   r   r   S   s    

z GoogleSearchService.setup_driverc                 C   sž   t ƒ }| d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | ddg¡ | d	d
¡ | d¡ ttƒ  ¡ ƒ}tj||d| _| j 	d¡ t
 d¡ dS )zSetup Chrome WebDriverz--headless=newz--no-sandboxz--disable-dev-shm-usagez--disable-gpuz--window-size=1920,1080z---disable-blink-features=AutomationControlledZexcludeSwitcheszenable-automationZuseAutomationExtensionFz|--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36©ÚserviceÚoptionszEObject.defineProperty(navigator, 'webdriver', {get: () => undefined})zChrome WebDriver initializedN)ÚChromeOptionsÚadd_argumentZadd_experimental_optionÚChromeServicer
   Úinstallr   ZChromer   Zexecute_scriptÚloggerÚinfo©r   r6   r5   r   r   r   r1   ^   s    






z'GoogleSearchService.setup_chrome_driverc                 C   s\   t ƒ }| d¡ | d¡ | d¡ | dd¡ ttƒ  ¡ ƒ}tj||d| _t	 
d¡ dS )	zSetup Firefox WebDriverz
--headlessz--width=1920z--height=1080zgeneral.useragent.overridezPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0r4   zFirefox WebDriver initializedN)ÚFirefoxOptionsr8   Zset_preferenceÚFirefoxServicer   r:   r   ZFirefoxr   r;   r<   r=   r   r   r   r2   p   s    


ÿz(GoogleSearchService.setup_firefox_driverr!   )Úmax_resultsr    c                 C   sz  g }dddg}g }|D ] }| j  tj|¡}|r|} q8qt|d|… ƒD ]*\}}zÞt|d d}	z8| tjd¡}
|
r˜|
j|	_|
 tj	d¡}| 
d	¡|	_W n   Y W qHY nX d
ddg}|D ]6}z | tj|¡}|j|	_W  qôW q¼   Y q¼Y q¼X q¼z| tjd¡}|j|	_W n   Y nX |	jr.| |	¡ W qH tk
rr } z"t d|› d|› ¡ W Y ¢qHW 5 d}~X Y qHX qH|S )z"Extract search results from Googlezdiv.gz!div[data-sokoban-container] div.gz
div.tF2CxcNr$   )ZrankZh3z./..Zhrefz.VwiC3bz.s3v9rdz.stZcitezError extracting Google result z: )r   Zfind_elementsr   ZCSS_SELECTORÚ	enumerater   Zfind_elementr"   ÚtitleZXPATHZget_attributeZurlÚsnippetZdisplayed_urlÚappendr3   r;   Údebug)r   r@   ÚresultsZresult_selectorsZresult_elementsZselectorÚelementsÚiÚelementr/   Z
title_linkZlink_elementZsnippet_selectorsrC   Zcite_elementÚer   r   r   Úextract_google_results}   sR    ý

z*GoogleSearchService.extract_google_results)Úrequestr    c                 C   s  t   ¡ }z¢| j |j dd¡¡}t d|j› ¡ | j |¡ t  	|j
¡ |  |j¡}t   ¡ | }t|j| j|j|t|ƒ|d}t d|j› dt|ƒ› d|d›d	¡ |W S  tk
r
 } z@t d
|› ¡ t|j| j|jg dt|ƒt   ¡ | d W Y ¢S d}~X Y nX dS )zPerform Google searchú ú+zSearching Google for: )Ú
request_idr   ÚqueryrF   Útotal_resultsÚprocessing_timezGoogle search completed for 'z': z results in z.2fÚsz Error performing Google search: r   )rO   r   rP   rF   rQ   ÚerrorrR   N)Útimer   r   rP   Úreplacer;   r<   r   ÚgetÚsleepZdelayrK   r@   r   rO   r   Úlenr3   rT   Ústr)r   rL   Z
start_timer   rF   rR   ÚresponserJ   r   r   r   Úperform_search·   s:    ú	&
ùz"GoogleSearchService.perform_search©ÚtopicÚmessageÚkeyc              
   C   s   zXt f |Ž}t d|j› ¡ |  |¡}| jjtd | ¡ |jd t d|j› ¡ W n2 t	k
rŠ } zt 
d|› ¡ W 5 d}~X Y nX dS )zHandle incoming search requestsz"Processing Google search request: ZSEARCH_RESULTSr]   z)Google search response sent for request: z&Error handling Google search request: N)r   r;   r<   rO   r\   r   Zsend_messager   Údictr3   rT   )r   r^   r_   r`   rL   r[   rJ   r   r   r   Úhandle_search_requestá   s    

ýz)GoogleSearchService.handle_search_requestc              
   C   sŒ   t  d¡ zrz| jjtd gd| jd W nN tk
rH   t  d¡ Y n2 tk
rx } zt  	d|› ¡ W 5 d}~X Y nX W 5 |  ¡  X dS )z#Start consuming messages from Kafkaz!Starting Google Search Service...ZGOOGLE_SEARCH_REQUESTSzgoogle-search-service)ZtopicsZgroup_idZmessage_handlerz%Google Search Service stopped by userz Error in Google Search Service: N)
r;   r<   Úcleanupr   Zconsume_messagesr   rb   ÚKeyboardInterruptr3   rT   )r   rJ   r   r   r   Ústart_consumingö   s    
ý
&z#GoogleSearchService.start_consumingc                 C   s>   | j r| j  ¡  t d¡ | jr*| j ¡  | jr:| j ¡  dS )zClean up resourceszWebDriver closedN)r   Úquitr;   r<   r   Úcloser   r   r   r   r   rc     s    


zGoogleSearchService.cleanupN)r!   )Ú__name__Ú
__module__Ú__qualname__r   r   rZ   r0   r   r1   r2   Úintr   r   rK   r   r   r\   ra   rb   re   rc   r   r   r   r   r   $   s   &:*r   Ú__main__)4ÚosÚsysrU   Zloggingr&   r(   r+   Útypingr   r   Zseleniumr   Zselenium.webdriver.common.byr   Zselenium.webdriver.support.uir   Zselenium.webdriver.supportr   ZECZ!selenium.webdriver.chrome.servicer   r9   Z!selenium.webdriver.chrome.optionsr	   r7   Z"selenium.webdriver.firefox.servicer?   Z"selenium.webdriver.firefox.optionsr>   Zwebdriver_manager.chromer
   Zwebdriver_manager.firefoxr   ÚpathrD   Zshared.models.schemasr   r   r   Zshared.utils.kafka_utilsr   r   Zshared.utils.redis_utilsr   ZbasicConfigÚINFOZ	getLoggerrh   r;   r   r5   re   r   r   r   r   Ú<module>   s@   þ
 p
