From a98dae78f1279397797c98f968bec2d94a2035bc Mon Sep 17 00:00:00 2001 From: emijrp Date: Fri, 8 Apr 2011 20:35:05 +0000 Subject: [PATCH] image regexp for old wikis (wiki libsdl, wikijuegos) git-svn-id: https://wikiteam.googlecode.com/svn/trunk@39 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 --- dumpgenerator.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index 5671559..3aacf33 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -272,7 +272,16 @@ def getImageFilenamesURL(config={}, start='!'): raw = cleanHTML(raw) #archiveteam Yahoovideo.jpg (file) #wikanda Fernandocg - m = re.compile(r'(?im)]+title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+[^<]+\s*]+>(?P[^<]+)').finditer(raw) + r_images1 = r'(?im)]+title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+[^<]+\s*]+>(?P[^<]+)' + #wikijuegos http://softwarelibre.uca.es/wikijuegos/Especial:Imagelist old mediawiki version + r_images2 = r'(?im)]+title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+\s*[^<]+\s*[^<]+\s*]+>(?P[^<]+)' + m = [] + #different mediawiki versions + if re.search(r_images1, raw): + m = re.compile(r_images1).finditer(raw) + elif re.search(r_images2, raw): + m = re.compile(r_images2).finditer(raw) + for i in m: url = i.group('url') if url[0] == '/' or not url.startswith('http://'): #relative URL