forked from Archives/searxng
d14994dc73
settings.yml: * outgoing.networks: * can contains network definition * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections, keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time) * local_addresses can be "192.168.0.1/24" (it supports IPv6) * support_ipv4 & support_ipv6: both True by default see https://github.com/searx/searx/pull/1034 * each engine can define a "network" section: * either a full network description * either reference an existing network * all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master)
110 lines
3.4 KiB
Python
110 lines
3.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""
|
|
Soundcloud (Music)
|
|
"""
|
|
|
|
import re
|
|
from json import loads
|
|
from lxml import html
|
|
from dateutil import parser
|
|
from urllib.parse import quote_plus, urlencode
|
|
from searx import logger
|
|
from searx.network import get as http_get
|
|
|
|
# about
|
|
about = {
|
|
"website": 'https://soundcloud.com',
|
|
"wikidata_id": 'Q568769',
|
|
"official_api_documentation": 'https://developers.soundcloud.com/',
|
|
"use_official_api": True,
|
|
"require_api_key": False,
|
|
"results": 'JSON',
|
|
}
|
|
|
|
# engine dependent config
|
|
categories = ['music']
|
|
paging = True
|
|
|
|
# search-url
|
|
# missing attribute: user_id, app_version, app_locale
|
|
url = 'https://api-v2.soundcloud.com/'
|
|
search_url = url + 'search?{query}'\
|
|
'&variant_ids='\
|
|
'&facet=model'\
|
|
'&limit=20'\
|
|
'&offset={offset}'\
|
|
'&linked_partitioning=1'\
|
|
'&client_id={client_id}' # noqa
|
|
|
|
embedded_url = '<iframe width="100%" height="166" ' +\
|
|
'scrolling="no" frameborder="no" ' +\
|
|
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
|
|
|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
|
guest_client_id = ''
|
|
|
|
|
|
def get_client_id():
|
|
response = http_get("https://soundcloud.com")
|
|
|
|
if response.ok:
|
|
tree = html.fromstring(response.content)
|
|
# script_tags has been moved from /assets/app/ to /assets/ path. I
|
|
# found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
|
|
script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
|
|
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
|
|
|
|
# extracts valid app_js urls from soundcloud.com content
|
|
for app_js_url in app_js_urls[::-1]:
|
|
# gets app_js and searches for the clientid
|
|
response = http_get(app_js_url)
|
|
if response.ok:
|
|
cids = cid_re.search(response.content.decode())
|
|
if cids is not None and len(cids.groups()):
|
|
return cids.groups()[0]
|
|
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
|
return ""
|
|
|
|
|
|
def init(engine_settings=None):
|
|
global guest_client_id
|
|
# api-key
|
|
guest_client_id = get_client_id()
|
|
|
|
|
|
# do search-request
|
|
def request(query, params):
|
|
offset = (params['pageno'] - 1) * 20
|
|
|
|
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
offset=offset,
|
|
client_id=guest_client_id)
|
|
|
|
return params
|
|
|
|
|
|
# get response from search-request
|
|
def response(resp):
|
|
results = []
|
|
|
|
search_res = loads(resp.text)
|
|
|
|
# parse results
|
|
for result in search_res.get('collection', []):
|
|
if result['kind'] in ('track', 'playlist'):
|
|
title = result['title']
|
|
content = result['description'] or ''
|
|
publishedDate = parser.parse(result['last_modified'])
|
|
uri = quote_plus(result['uri'])
|
|
embedded = embedded_url.format(uri=uri)
|
|
|
|
# append result
|
|
results.append({'url': result['permalink_url'],
|
|
'title': title,
|
|
'publishedDate': publishedDate,
|
|
'embedded': embedded,
|
|
'content': content})
|
|
|
|
# return results
|
|
return results
|