@ -142,9 +142,6 @@ search_url = base_url + '/sp/search'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
results_xpath = ' //div[@class= " w-gl__result__main " ] '
link_xpath = ' .//a[@class= " w-gl__result-title result-link " ] '
content_xpath = ' .//p[@class= " w-gl__description " ] '
search_form_xpath = ' //form[@id= " search " ] '
""" XPath of Startpage ' s origin search form
@ -334,8 +331,8 @@ def _response_cat_web(dom):
results = [ ]
# parse results
for result in eval_xpath ( dom , results_xpath ) :
links = eval_xpath ( result , link_xpath )
for result in eval_xpath ( dom , ' //div[@class= " w-gl " ]/div[contains(@class, " result " )] ' ) :
links = eval_xpath ( result , ' .//a[contains(@class, " result-title result-link " )] ' )
if not links :
continue
link = links [ 0 ]
@ -349,12 +346,9 @@ def _response_cat_web(dom):
if re . match ( r " ^http(s|)://(www \ .)?startpage \ .com/do/search \ ?.*$ " , url ) :
continue
title = extract_text ( link )
if eval_xpath ( result , content_xpath ) :
content : str = extract_text ( eval_xpath ( result , content_xpath ) ) # type: ignore
else :
content = ' '
title = extract_text ( eval_xpath ( link , ' h2 ' ) )
content = eval_xpath ( result , ' .//p[contains(@class, " description " )] ' )
content = extract_text ( content , allow_none = True ) or ' '
published_date = None