From b9f861d8c206bd39f1293f1c16c008a5c141b47b Mon Sep 17 00:00:00 2001 From: yzqzss Date: Sun, 2 Jul 2023 01:19:42 +0800 Subject: [PATCH] fix: the regex `r_images5` not works handle non-English wikis and new MediaWiki versions. --- dumpgenerator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index c66b245..7bc9341 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1288,10 +1288,10 @@ def getImageNamesScraper(config={}, session=None): # (desc) 109 0923.JPG . . 885,713 bytes . . Bfalconer . . 18:44, 17 November 2005
r_images4 = r'(?im)]+ title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+[^<]+]+>(?P[^<]+)' r_images5 = ( - r'(?im)\s*]*?>(?P[^>]+)\s*\([^<]*?\s*\)\s*\s*' + r'(?im)\s*]*?>(?P[^>]+)[^<]*?[^<]*?[^<]*?\s*' '[^\n\r]*?\s*' '[^<]*?\s*' - '\s*()?(?P[^<]+?)()?\s*') + '\s*()?(?P[^<]+?)()?\s*') # Select the regexp that returns more results regexps = [r_images1, r_images2, r_images3, r_images4, r_images5]