|
|
|
@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS
|
|
|
|
|
from searx.network import post, get
|
|
|
|
|
from searx.utils import match_language, searx_useragent, get_string_replaces_function
|
|
|
|
|
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
|
|
|
|
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
|
|
|
|
from searx.engines.wikipedia import (
|
|
|
|
|
_fetch_supported_languages,
|
|
|
|
|
supported_languages_url,
|
|
|
|
|
) # NOQA # pylint: disable=unused-import
|
|
|
|
|
|
|
|
|
|
# about
|
|
|
|
|
about = {
|
|
|
|
@ -112,10 +115,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
|
|
|
|
|
|
|
|
|
def get_headers():
|
|
|
|
|
# user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
|
|
|
|
|
return {
|
|
|
|
|
'Accept': 'application/sparql-results+json',
|
|
|
|
|
'User-Agent': searx_useragent()
|
|
|
|
|
}
|
|
|
|
|
return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_label_for_entity(entity_id, language):
|
|
|
|
@ -211,9 +211,9 @@ def get_results(attribute_result, attributes, language):
|
|
|
|
|
results.append({'title': infobox_title, 'url': url})
|
|
|
|
|
# update the infobox_id with the wikipedia URL
|
|
|
|
|
# first the local wikipedia URL, and as fallback the english wikipedia URL
|
|
|
|
|
if attribute_type == WDArticle\
|
|
|
|
|
and ((attribute.language == 'en' and infobox_id_lang is None)
|
|
|
|
|
or attribute.language != 'en'):
|
|
|
|
|
if attribute_type == WDArticle and (
|
|
|
|
|
(attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en'
|
|
|
|
|
):
|
|
|
|
|
infobox_id_lang = attribute.language
|
|
|
|
|
infobox_id = url
|
|
|
|
|
elif attribute_type == WDImageAttribute:
|
|
|
|
@ -232,13 +232,11 @@ def get_results(attribute_result, attributes, language):
|
|
|
|
|
osm_zoom = area_to_osm_zoom(area) if area else 19
|
|
|
|
|
url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
|
|
|
|
|
if url:
|
|
|
|
|
infobox_urls.append({'title': attribute.get_label(language),
|
|
|
|
|
'url': url,
|
|
|
|
|
'entity': attribute.name})
|
|
|
|
|
infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name})
|
|
|
|
|
else:
|
|
|
|
|
infobox_attributes.append({'label': attribute.get_label(language),
|
|
|
|
|
'value': value,
|
|
|
|
|
'entity': attribute.name})
|
|
|
|
|
infobox_attributes.append(
|
|
|
|
|
{'label': attribute.get_label(language), 'value': value, 'entity': attribute.name}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if infobox_id:
|
|
|
|
|
infobox_id = replace_http_by_https(infobox_id)
|
|
|
|
@ -246,22 +244,19 @@ def get_results(attribute_result, attributes, language):
|
|
|
|
|
# add the wikidata URL at the end
|
|
|
|
|
infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
|
|
|
|
|
|
|
|
|
|
if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\
|
|
|
|
|
len(infobox_content) == 0:
|
|
|
|
|
results.append({
|
|
|
|
|
'url': infobox_urls[0]['url'],
|
|
|
|
|
'title': infobox_title,
|
|
|
|
|
'content': infobox_content
|
|
|
|
|
})
|
|
|
|
|
if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0:
|
|
|
|
|
results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
|
|
|
|
|
else:
|
|
|
|
|
results.append({
|
|
|
|
|
results.append(
|
|
|
|
|
{
|
|
|
|
|
'infobox': infobox_title,
|
|
|
|
|
'id': infobox_id,
|
|
|
|
|
'content': infobox_content,
|
|
|
|
|
'img_src': img_src,
|
|
|
|
|
'urls': infobox_urls,
|
|
|
|
|
'attributes': infobox_attributes
|
|
|
|
|
})
|
|
|
|
|
'attributes': infobox_attributes,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -271,13 +266,14 @@ def get_query(query, language):
|
|
|
|
|
where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
|
|
|
|
|
wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
|
|
|
|
|
group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
|
|
|
|
|
query = QUERY_TEMPLATE\
|
|
|
|
|
.replace('%QUERY%', sparql_string_escape(query))\
|
|
|
|
|
.replace('%SELECT%', ' '.join(select))\
|
|
|
|
|
.replace('%WHERE%', '\n '.join(where))\
|
|
|
|
|
.replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\
|
|
|
|
|
.replace('%GROUP_BY%', ' '.join(group_by))\
|
|
|
|
|
query = (
|
|
|
|
|
QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query))
|
|
|
|
|
.replace('%SELECT%', ' '.join(select))
|
|
|
|
|
.replace('%WHERE%', '\n '.join(where))
|
|
|
|
|
.replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))
|
|
|
|
|
.replace('%GROUP_BY%', ' '.join(group_by))
|
|
|
|
|
.replace('%LANGUAGE%', language)
|
|
|
|
|
)
|
|
|
|
|
return query, attributes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -303,28 +299,34 @@ def get_attributes(language):
|
|
|
|
|
attributes.append(WDDateAttribute(name))
|
|
|
|
|
|
|
|
|
|
# Dates
|
|
|
|
|
for p in ['P571', # inception date
|
|
|
|
|
for p in [
|
|
|
|
|
'P571', # inception date
|
|
|
|
|
'P576', # dissolution date
|
|
|
|
|
'P580', # start date
|
|
|
|
|
'P582', # end date
|
|
|
|
|
'P569', # date of birth
|
|
|
|
|
'P570', # date of death
|
|
|
|
|
'P619', # date of spacecraft launch
|
|
|
|
|
'P620']: # date of spacecraft landing
|
|
|
|
|
'P620',
|
|
|
|
|
]: # date of spacecraft landing
|
|
|
|
|
add_date(p)
|
|
|
|
|
|
|
|
|
|
for p in ['P27', # country of citizenship
|
|
|
|
|
for p in [
|
|
|
|
|
'P27', # country of citizenship
|
|
|
|
|
'P495', # country of origin
|
|
|
|
|
'P17', # country
|
|
|
|
|
'P159']: # headquarters location
|
|
|
|
|
'P159',
|
|
|
|
|
]: # headquarters location
|
|
|
|
|
add_label(p)
|
|
|
|
|
|
|
|
|
|
# Places
|
|
|
|
|
for p in ['P36', # capital
|
|
|
|
|
for p in [
|
|
|
|
|
'P36', # capital
|
|
|
|
|
'P35', # head of state
|
|
|
|
|
'P6', # head of government
|
|
|
|
|
'P122', # basic form of government
|
|
|
|
|
'P37']: # official language
|
|
|
|
|
'P37',
|
|
|
|
|
]: # official language
|
|
|
|
|
add_label(p)
|
|
|
|
|
|
|
|
|
|
add_value('P1082') # population
|
|
|
|
@ -334,7 +336,8 @@ def get_attributes(language):
|
|
|
|
|
add_amount('P2048') # heigth (building)
|
|
|
|
|
|
|
|
|
|
# Media
|
|
|
|
|
for p in ['P400', # platform (videogames, computing)
|
|
|
|
|
for p in [
|
|
|
|
|
'P400', # platform (videogames, computing)
|
|
|
|
|
'P50', # author
|
|
|
|
|
'P170', # creator
|
|
|
|
|
'P57', # director
|
|
|
|
@ -348,7 +351,8 @@ def get_attributes(language):
|
|
|
|
|
'P123', # publisher
|
|
|
|
|
'P449', # original network
|
|
|
|
|
'P750', # distributed by
|
|
|
|
|
'P86']: # composer
|
|
|
|
|
'P86',
|
|
|
|
|
]: # composer
|
|
|
|
|
add_label(p)
|
|
|
|
|
|
|
|
|
|
add_date('P577') # publication date
|
|
|
|
@ -415,7 +419,7 @@ def get_attributes(language):
|
|
|
|
|
|
|
|
|
|
class WDAttribute:
|
|
|
|
|
|
|
|
|
|
__slots__ = 'name',
|
|
|
|
|
__slots__ = ('name',)
|
|
|
|
|
|
|
|
|
|
def __init__(self, name):
|
|
|
|
|
self.name = name
|
|
|
|
@ -443,14 +447,15 @@ class WDAttribute:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WDAmountAttribute(WDAttribute):
|
|
|
|
|
|
|
|
|
|
def get_select(self):
|
|
|
|
|
return '?{name} ?{name}Unit'.replace('{name}', self.name)
|
|
|
|
|
|
|
|
|
|
def get_where(self):
|
|
|
|
|
return """ OPTIONAL { ?item p:{name} ?{name}Node .
|
|
|
|
|
?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
|
|
|
|
|
OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name)
|
|
|
|
|
OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace(
|
|
|
|
|
'{name}', self.name
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_group_by(self):
|
|
|
|
|
return self.get_select()
|
|
|
|
@ -484,7 +489,9 @@ class WDArticle(WDAttribute):
|
|
|
|
|
return """OPTIONAL { ?article{language} schema:about ?item ;
|
|
|
|
|
schema:inLanguage "{language}" ;
|
|
|
|
|
schema:isPartOf <https://{language}.wikipedia.org/> ;
|
|
|
|
|
schema:name ?articleName{language} . }""".replace('{language}', self.language)
|
|
|
|
|
schema:name ?articleName{language} . }""".replace(
|
|
|
|
|
'{language}', self.language
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_group_by(self):
|
|
|
|
|
return self.get_select()
|
|
|
|
@ -495,7 +502,6 @@ class WDArticle(WDAttribute):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WDLabelAttribute(WDAttribute):
|
|
|
|
|
|
|
|
|
|
def get_select(self):
|
|
|
|
|
return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
|
|
|
|
|
|
|
|
|
@ -526,14 +532,13 @@ class WDURLAttribute(WDAttribute):
|
|
|
|
|
value = value.split(',')[0]
|
|
|
|
|
url_id = self.url_id
|
|
|
|
|
if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
|
|
|
|
|
value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):]
|
|
|
|
|
value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :]
|
|
|
|
|
url_id = 'wikimedia_image'
|
|
|
|
|
return get_external_url(url_id, value)
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WDGeoAttribute(WDAttribute):
|
|
|
|
|
|
|
|
|
|
def get_label(self, language):
|
|
|
|
|
return "OpenStreetMap"
|
|
|
|
|
|
|
|
|
@ -543,7 +548,9 @@ class WDGeoAttribute(WDAttribute):
|
|
|
|
|
def get_where(self):
|
|
|
|
|
return """OPTIONAL { ?item p:{name}/psv:{name} [
|
|
|
|
|
wikibase:geoLatitude ?{name}Lat ;
|
|
|
|
|
wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name)
|
|
|
|
|
wikibase:geoLongitude ?{name}Long ] }""".replace(
|
|
|
|
|
'{name}', self.name
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_group_by(self):
|
|
|
|
|
return self.get_select()
|
|
|
|
@ -565,7 +572,7 @@ class WDGeoAttribute(WDAttribute):
|
|
|
|
|
|
|
|
|
|
class WDImageAttribute(WDURLAttribute):
|
|
|
|
|
|
|
|
|
|
__slots__ = 'priority',
|
|
|
|
|
__slots__ = ('priority',)
|
|
|
|
|
|
|
|
|
|
def __init__(self, name, url_id=None, priority=100):
|
|
|
|
|
super().__init__(name, url_id)
|
|
|
|
@ -573,7 +580,6 @@ class WDImageAttribute(WDURLAttribute):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WDDateAttribute(WDAttribute):
|
|
|
|
|
|
|
|
|
|
def get_select(self):
|
|
|
|
|
return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
|
|
|
|
|
|
|
|
|
@ -587,7 +593,9 @@ class WDDateAttribute(WDAttribute):
|
|
|
|
|
wikibase:timePrecision ?{name}timePrecision ;
|
|
|
|
|
wikibase:timeTimezone ?{name}timeZone ;
|
|
|
|
|
wikibase:timeCalendarModel ?{name}timeCalendar ] . }
|
|
|
|
|
hint:Prior hint:rangeSafe true;""".replace('{name}', self.name)
|
|
|
|
|
hint:Prior hint:rangeSafe true;""".replace(
|
|
|
|
|
'{name}', self.name
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_group_by(self):
|
|
|
|
|
return self.get_select()
|
|
|
|
@ -619,11 +627,12 @@ class WDDateAttribute(WDAttribute):
|
|
|
|
|
def format_13(self, value, locale):
|
|
|
|
|
timestamp = isoparse(value)
|
|
|
|
|
# precision: minute
|
|
|
|
|
return get_datetime_format(format, locale=locale) \
|
|
|
|
|
.replace("'", "") \
|
|
|
|
|
.replace('{0}', format_time(timestamp, 'full', tzinfo=None,
|
|
|
|
|
locale=locale)) \
|
|
|
|
|
return (
|
|
|
|
|
get_datetime_format(format, locale=locale)
|
|
|
|
|
.replace("'", "")
|
|
|
|
|
.replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale))
|
|
|
|
|
.replace('{1}', format_date(timestamp, 'short', locale=locale))
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def format_14(self, value, locale):
|
|
|
|
|
# precision: second.
|
|
|
|
@ -644,7 +653,7 @@ class WDDateAttribute(WDAttribute):
|
|
|
|
|
'11': ('format_11', 0), # day
|
|
|
|
|
'12': ('format_13', 0), # hour (not supported by babel, display minute)
|
|
|
|
|
'13': ('format_13', 0), # minute
|
|
|
|
|
'14': ('format_14', 0) # second
|
|
|
|
|
'14': ('format_14', 0), # second
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_str(self, result, language):
|
|
|
|
|