diff --git a/dumpgenerator.py b/dumpgenerator.py
index c62070d..fbb10ad 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -287,9 +287,9 @@ def getPageTitlesScraper(config={}, session=None):
r_title = r'title="(?P
[^>]+)">'
r_suballpages = ''
- r_suballpages1 = r'&from=(?P[^>]+)&to=(?P[^>]+)">'
- r_suballpages2 = r'Special:Allpages/(?P[^>]+)">'
- r_suballpages3 = r'&from=(?P[^>]+)" title="[^>]+">'
+ r_suballpages1 = r'&from=(?P[^>"]+)&to=(?P[^>"]+)">'
+ r_suballpages2 = r'Special:Allpages/(?P[^>"]+)">'
+ r_suballpages3 = r'&from=(?P[^>"]+)" title="[^>]+">'
if re.search(r_suballpages1, raw):
r_suballpages = r_suballpages1
elif re.search(r_suballpages2, raw):
@@ -299,7 +299,7 @@ def getPageTitlesScraper(config={}, session=None):
else:
pass # perhaps no subpages
- # Should be enought subpages on Special:Allpages
+ # Should be enough subpages on Special:Allpages
deep = 50
c = 0
oldfr = ''
@@ -321,8 +321,8 @@ def getPageTitlesScraper(config={}, session=None):
name = '%s-%s' % (fr, to)
url = '%s?title=Special:Allpages&namespace=%s&from=%s&to=%s' % (
config['index'], namespace, fr, to) # do not put urllib.quote in fr or to
- # fix, esta regexp no carga bien todas? o falla el r_title en
- # este tipo de subpag? (wikiindex)
+ # fix, this regexp doesn't properly save everything? or does r_title fail on this
+ # type of subpage? (wikiindex)
elif r_suballpages == r_suballpages2:
# clean &namespace=\d, sometimes happens
fr = fr.split('&namespace=')[0]