From c07b527e5dfc1992ddbbecd432fa959eaf5d3b5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20J=2E=20Rodr=C3=ADguez-Posada?= Date: Thu, 3 Jul 2014 19:33:09 +0200 Subject: [PATCH] adding session to getWikiEngine() --- dumpgenerator.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index b30295d..cd6a58e 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1223,20 +1223,17 @@ def avoidWikimediaProjects(config={}, other={}): def getWikiEngine(url=''): """ Returns the wiki engine of a URL, if known """ - req = urllib2.Request(url=url, headers={'User-Agent': getUserAgent(), 'Accept-Encoding': 'gzip'}) - f = urllib2.urlopen(req) - if f.headers.get('Content-Encoding') and 'gzip' in f.headers.get('Content-Encoding'): - raw = gzip.GzipFile(fileobj=StringIO.StringIO(f.read())).read() - else: - raw = f.read() - f.close() + session = requests.Session() + session.headers = {'User-Agent': getUserAgent()} + r = session.post(url=url) + result = r.text wikiengine = 'Unknown' - if re.search(ur'(?im)(