diff --git a/wikiadownloader.py b/wikiadownloader.py index f719abd..a8b451f 100644 --- a/wikiadownloader.py +++ b/wikiadownloader.py @@ -42,40 +42,41 @@ f = open('wikia.com', 'r') wikia = f.read().strip().split('\n') f.close() -print len(wikia), 'wikis in Wikia' +print >>sys.stderr, len(wikia), 'wikis in Wikia' start = '!' if len(sys.argv) > 1: start = sys.argv[1] for wiki in wikia: + wiki = wiki.lower() prefix = wiki.split('http://')[1] if prefix < start: continue - print wiki - path = '%s/%s/%s' % (prefix[0], prefix[0:2], prefix) + print >>sys.stderr, "Starting:", wiki f = urllib.urlopen('%s/wiki/Special:Statistics' % (wiki)) html = f.read() - #print html f.close() - m = re.compile(r'(?i)(?P\d\d:\d\d), (?P[a-z]+) (?P\d+), (?P\d+)').finditer(html) - for i in m: + m = re.compile(r'(?i)(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P