From 43945c467f5912c39bef43e43154fc0b92b51929 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 11 Sep 2022 16:41:17 -0700 Subject: [PATCH] Work around unicode titles not working with resuming Before, you would get UnicodeWarning: Unicode unequal comparison failed to convert both arguments to Unicode - interpreting them as being unequal. The %s versus {} change was needed because otherwise you would get UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-5: ordinal not in range(128). There is probably a better way of solving that, but this one does work. --- dumpgenerator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index bd27ff1..7c671ef 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -730,7 +730,7 @@ def generateXMLDump(config={}, titles=[], start=None, session=None): if config['xmlrevisions']: if start: - print("WARNING: will try to start the download from title: {}".format(start)) + print("WARNING: will try to start the download from title: %s" % start) xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a') else: print 'Retrieving the XML for every page from the beginning' @@ -1160,7 +1160,7 @@ def reverse_readline(filename, buf_size=8192, truncate=False): fh.truncate raise StopIteration else: - yield segment + yield segment.decode('utf-8') segment = lines[0] for index in range(len(lines) - 1, 0, -1): if truncate and '' in segment: @@ -1169,8 +1169,8 @@ def reverse_readline(filename, buf_size=8192, truncate=False): fh.truncate raise StopIteration else: - yield lines[index] - yield segment + yield lines[index].decode('utf-8') + yield segment.decode('utf-8') def saveImageNames(config={}, images=[], session=None): """ Save image list in a file, including filename, url and uploader """