From 7da27ab11036a75bb514378e3b62fc4c4488251f Mon Sep 17 00:00:00 2001 From: arkiver Date: Sat, 18 Nov 2023 16:25:31 +0100 Subject: [PATCH] Version 20231118.01. Switch to gnutls. --- Dockerfile | 2 +- pipeline.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index c3197f3..4d84e9f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1 +1 @@ -FROM atdr.meo.ws/archiveteam/grab-base +FROM atdr.meo.ws/archiveteam/grab-base:gnutls diff --git a/pipeline.py b/pipeline.py index eeee808..21072d5 100644 --- a/pipeline.py +++ b/pipeline.py @@ -59,7 +59,7 @@ WGET_AT = find_executable( ), [ './wget-at', - '/home/warrior/data/wget-at' + '/home/warrior/data/wget-at-gnutls' ] ) @@ -72,7 +72,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20231115.01' +VERSION = '20231118.01' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 100 @@ -280,8 +280,7 @@ class WgetArgs(object): '--warc-dedup-url-agnostic', '--warc-compression-use-zstd', '--warc-zstd-dict-no-include', - '--header', 'Accept-Language: en-US;q=0.9, en;q=0.8', - '--ciphers', 'HIGH:+CHACHA20' + '--header', 'Accept-Language: en-US;q=0.9, en;q=0.8' ] dict_data = ZstdDict.get_dict() with open(os.path.join(item['item_dir'], 'zstdict'), 'wb') as f: