diff --git a/dumpgenerator.py b/dumpgenerator.py index 71e5588..2edceaa 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -417,7 +417,7 @@ def getImageFilenamesURL(config={}): images = [] offset = '29990101000000' #january 1, 2999 while offset: - url = '%s?title=Special:Imagelist&limit=500&offset=%s' % (config['index'], offset) #5000 overload some servers + url = '%s?title=Special:Imagelist&limit=5000&offset=%s' % (config['index'], offset) #5000 overload some servers, but it is needed for sites like this with no next links http://www.memoryarchive.org/en/index.php?title=Special:Imagelist&sort=byname&limit=50&wpIlMatch= #print url raw = urllib.urlopen(url).read() raw = cleanHTML(raw) @@ -428,6 +428,9 @@ def getImageFilenamesURL(config={}): r_images2 = r'(?im)]+title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+\s*[^<]+\s*[^<]+\s*]+>(?P[^<]+)' #gentoowiki 1.18 18:15, 3 April 2011Asus eeepc-1201nl.png (file)37 KBYannails 1 r_images3 = r'(?im)]+title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+[^<]+]+>]+>[^<]+]+>(?P[^<]+)' + #http://www.memoryarchive.org/en/index.php?title=Special:Imagelist&sort=byname&limit=50&wpIlMatch= + #(desc) 109 0923.JPG . . 885,713 bytes . . Bfalconer . . 18:44, 17 November 2005
+ r_images4 = r'(?im)]+ title="[^:>]+:(?P[^>]+)">[^<]+[^<]+[^<]+[^<]+]+>(?P[^<]+)' m = [] #different mediawiki versions if re.search(r_images1, raw): @@ -436,6 +439,8 @@ def getImageFilenamesURL(config={}): m = re.compile(r_images2).finditer(raw) elif re.search(r_images3, raw): m = re.compile(r_images3).finditer(raw) + elif re.search(r_images4, raw): + m = re.compile(r_images4).finditer(raw) for i in m: url = i.group('url') diff --git a/uploadedwikis.txt b/uploadedwikis.txt index 378a04e..bee7a6b 100644 --- a/uploadedwikis.txt +++ b/uploadedwikis.txt @@ -80,3 +80,6 @@ http://wiki.frema.ecs.soton.ac.uk/api.php http://wikitravel.org/wiki/hu/api.php http://wiki.freeculture.org/api.php http://loprometidoesdeuda.com/api.php +http://es.tanatopedia.net/api.php +http://www.rezeptewiki.org/api.php +http://www.tarracowiki.cat/tarracowiki/api.php