|
|
|
@ -272,7 +272,16 @@ def getImageFilenamesURL(config={}, start='!'):
|
|
|
|
|
raw = cleanHTML(raw)
|
|
|
|
|
#archiveteam <td class="TablePager_col_img_name"><a href="/index.php?title=File:Yahoovideo.jpg" title="File:Yahoovideo.jpg">Yahoovideo.jpg</a> (<a href="/images/2/2b/Yahoovideo.jpg">file</a>)</td>
|
|
|
|
|
#wikanda <td class="TablePager_col_img_user_text"><a href="/w/index.php?title=Usuario:Fernandocg&action=edit&redlink=1" class="new" title="Usuario:Fernandocg (página no existe)">Fernandocg</a></td>
|
|
|
|
|
m = re.compile(r'(?im)<td class="TablePager_col_img_name"><a href[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+/[^>/]+)">[^<]+</a>[^<]+</td>\s*<td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>').finditer(raw)
|
|
|
|
|
r_images1 = r'(?im)<td class="TablePager_col_img_name"><a href[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+/[^>/]+)">[^<]+</a>[^<]+</td>\s*<td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>'
|
|
|
|
|
#wikijuegos http://softwarelibre.uca.es/wikijuegos/Especial:Imagelist old mediawiki version
|
|
|
|
|
r_images2 = r'(?im)<td class="TablePager_col_links"><a href[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+/[^>/]+)">[^<]+</a></td>\s*<td class="TablePager_col_img_timestamp">[^<]+</td>\s*<td class="TablePager_col_img_name">[^<]+</td>\s*<td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>'
|
|
|
|
|
m = []
|
|
|
|
|
#different mediawiki versions
|
|
|
|
|
if re.search(r_images1, raw):
|
|
|
|
|
m = re.compile(r_images1).finditer(raw)
|
|
|
|
|
elif re.search(r_images2, raw):
|
|
|
|
|
m = re.compile(r_images2).finditer(raw)
|
|
|
|
|
|
|
|
|
|
for i in m:
|
|
|
|
|
url = i.group('url')
|
|
|
|
|
if url[0] == '/' or not url.startswith('http://'): #relative URL
|
|
|
|
|