@ -7,75 +7,70 @@
@using - api no ( TODO , rewrite to api )
@using - api no ( TODO , rewrite to api )
@results HTML
@results HTML
@stable no ( HTML can change )
@stable no ( HTML can change )
@parse url , title , thumbnail_src, img_src
@parse url , title , img_src
@todo rewrite to api
@todo rewrite to api
"""
"""
# pylint: disable=missing-function-docstring
from lxml import html
import re
from urllib . parse import urlencode
from urllib . parse import urlencode
from lxml import html
# engine dependent config
# engine dependent config
categories = [ ' images ' ]
categories = [ ' images ' ]
paging = True
paging = True
time_range_support = True
time_range_support = True
# search-url
time_range_dict = {
base_url = ' https://www.deviantart.com/ '
' day ' : ' popular-24-hours ' ,
search_url = base_url + ' search?page= {page} & {query} '
' week ' : ' popular-1-week ' ,
time_range_url = ' &order= {range} '
' month ' : ' popular-1-month ' ,
' year ' : ' most-recent ' ,
time_range_dict = { ' day ' : 11 ,
}
' week ' : 14 ,
' month ' : 15 }
# search-url
base_url = ' https://www.deviantart.com '
# do search-request
def request ( query , params ) :
def request ( query , params ) :
if params [ ' time_range ' ] and params [ ' time_range ' ] not in time_range_dict :
return params
params [ ' url ' ] = search_url . format ( page = params [ ' pageno ' ] ,
# https://www.deviantart.com/search/deviations?page=5&q=foo
query = urlencode ( { ' q ' : query } ) )
query = {
' page ' : params [ ' pageno ' ] ,
' q ' : query ,
}
if params [ ' time_range ' ] in time_range_dict :
if params [ ' time_range ' ] in time_range_dict :
params [ ' url ' ] + = time_range_url . format ( range = time_range_dict [ params [ ' time_range ' ] ] )
query[ ' order ' ] = time_range_dict [ params [ ' time_range ' ] ]
return params
params [ ' url ' ] = base_url + ' /search/deviations? ' + urlencode ( query )
return params
# get response from search-request
def response ( resp ) :
def response ( resp ) :
results = [ ]
# return empty array if a redirection code is returned
results = [ ]
if resp . status_code == 302 :
return [ ]
dom = html . fromstring ( resp . text )
dom = html . fromstring ( resp . text )
# parse results
for row in dom . xpath ( ' //div[contains(@data-hook, " content_row " )] ' ) :
for row in dom . xpath ( ' //div[contains(@data-hook, " content_row " )] ' ) :
for result in row . xpath ( ' ./div ' ) :
for result in row . xpath ( ' ./div ' ) :
link = result . xpath ( ' .//a[@data-hook= " deviation_link " ] ' ) [ 0 ]
url = link . attrib . get ( ' href ' )
a_tag = result . xpath ( ' .//a[@data-hook= " deviation_link " ] ' ) [ 0 ]
title = link . attrib . get ( ' title ' )
noscript_tag = a_tag . xpath ( ' .//noscript ' )
thumbnail_src = result . xpath ( ' .//img ' ) [ 0 ] . attrib . get ( ' src ' )
img_src = thumbnail_src
if noscript_tag :
img_tag = noscript_tag [ 0 ] . xpath ( ' .//img ' )
# http to https, remove domain sharding
else :
thumbnail_src = re . sub ( r " https?://(th|fc) \ d+. " , " https://th01. " , thumbnail_src )
img_tag = a_tag . xpath ( ' .//img ' )
thumbnail_src = re . sub ( r " http:// " , " https:// " , thumbnail_src )
if not img_tag :
continue
url = re . sub ( r " http://(.*) \ .deviantart \ .com/ " , " https:// \\ 1.deviantart.com/ " , url )
img_tag = img_tag [ 0 ]
# append result
results . append ( {
results . append ( { ' url ' : url ,
' template ' : ' images.html ' ,
' title ' : title ,
' url ' : a_tag . attrib . get ( ' href ' ) ,
' img_src ' : img_src ,
' img_src ' : img_tag . attrib . get ( ' src ' ) ,
' thumbnail_src ' : thumbnail_src ,
' title ' : img_tag . attrib . get ( ' alt ' ) ,
' template ' : ' images.html ' } )
} )
# return results
return results
return results