Version 20240216.01. Use fixed minimum Wget version 1.21.3-at.20231213.03. Use TLSv1.2. Fix check on svc comment content check.

master
arkiver 3 months ago
parent 48dc016faf
commit daab40aa6e

@ -55,7 +55,7 @@ WGET_AT = find_executable(
'Wget+AT',
HigherVersion(
r'(GNU Wget 1\.[0-9]{2}\.[0-9]{1}-at\.[0-9]{8}\.[0-9]{2})[^0-9a-zA-Z\.-_]',
'GNU Wget 1.21.3-at.20230623.01'
'GNU Wget 1.21.3-at.20231213.03'
),
[
'./wget-at',
@ -72,7 +72,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20231201.01'
VERSION = '20240216.01'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 100
@ -281,7 +281,7 @@ class WgetArgs(object):
'--warc-compression-use-zstd',
'--warc-zstd-dict-no-include',
'--header', 'Accept-Language: en-US;q=0.9, en;q=0.8',
'--secure-protocol', 'PFS',
'--secure-protocol', 'TLSv1_2',
#'--ciphers', '+ECDHE-RSA:+AES-256-CBC:+SHA384'
]
dict_data = ZstdDict.get_dict()

@ -739,6 +739,8 @@ wget.callbacks.write_to_warc = function(url, http_stat)
if (
string.match(url["url"], "^https?://[^/]+/svc/")
and string.match(html, 'level%s*=%s*"')
and not string.match(html, '<shreddit%-async%-loader[^a-z]')
and not string.match(html, '<shreddit%-comment%-tree[^a-z]')
) or (
string.match(url["url"], "^https?://old%.reddit%.com/r/")
and not (

Loading…
Cancel
Save