lint: fix all linting warnings

pull/475/head
Misty 1 year ago
parent b04e6840b0
commit 231dbcf1df

@ -14,6 +14,7 @@ def checkAPI(api="", session: requests.Session=None):
"""Checking API availability"""
global cj
# handle redirects
r: Optional[requests.Response] = None
for i in range(4):
print("Checking API...", api)
r = session.get(
@ -21,11 +22,12 @@ def checkAPI(api="", session: requests.Session=None):
params={"action": "query", "meta": "siteinfo", "format": "json"},
timeout=30,
)
if i >= 4:
break
if r.status_code == 200:
break
elif r.status_code < 400:
p = r.url
api = urlunparse([p.scheme, p.netloc, p.path, "", "", ""])
api = r.url
elif r.status_code > 400:
print(
"MediaWiki API URL not found or giving error: HTTP %d" % r.status_code

@ -1,6 +1,7 @@
import argparse
import datetime
import http
import http.cookiejar
import os
import re
import sys
@ -13,7 +14,7 @@ from wikiteam3.dumpgenerator.api.index_check import checkIndex
from wikiteam3.utils import getUserAgent
from wikiteam3.dumpgenerator.version import getVersion
from wikiteam3.dumpgenerator.api import getWikiEngine
from wikiteam3.dumpgenerator.config import Config, DefaultConfig, newConfig
from wikiteam3.dumpgenerator.config import Config, newConfig
from typing import *
@ -29,10 +30,10 @@ def getParameters(params=None) -> Tuple[Config, Dict]:
"--cookies", metavar="cookies.txt", help="path to a cookies.txt file"
)
parser.add_argument(
"--delay", metavar=5, default=0.5, type=float, help="adds a delay (in seconds)"
"--delay", metavar="5", default=0.5, type=float, help="adds a delay (in seconds)"
)
parser.add_argument(
"--retries", metavar=5, default=5, help="Maximum number of retries for "
"--retries", metavar="5", default=5, help="Maximum number of retries for "
)
parser.add_argument("--path", help="path to store wiki dump at")
parser.add_argument(
@ -186,6 +187,7 @@ def getParameters(params=None) -> Tuple[Config, Dict]:
# print (index)
index2 = None
check, checkedapi = False, None
if api:
check, checkedapi = checkRetryAPI(
api=api,

@ -3,7 +3,7 @@ import threading
import time
import sys
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
class Delay:

@ -62,7 +62,6 @@ class Config:
templates: bool = False
DefaultConfig = Config()
def newConfig(configDict) -> Config:
return _dataclass_from_dict(Config, configDict)

@ -23,7 +23,7 @@ except ImportError:
from typing import *
from wikiteam3.dumpgenerator.config import loadConfig, saveConfig
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.dumpgenerator.cli import getParameters, bye, welcome
from wikiteam3.utils import domain2prefix
from wikiteam3.utils import truncateFilename
@ -32,8 +32,8 @@ from wikiteam3.utils import avoidWikimediaProjects
from .page.image import Image
from .misc.index_php import saveIndexPHP
from .misc.logs import saveLogs
from .misc.page_special_version import saveSpecialVersion
from .misc.special_logs import saveLogs
from .misc.special_version import saveSpecialVersion
from .page.page_titles import getPageTitles, readTitles
from .misc.site_info import saveSiteInfo
from .xmlrev.xml_dump import generateXMLDump

@ -2,7 +2,7 @@ import os
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import removeIP
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def saveIndexPHP(config: Config=None, session=None):
"""Save index.php as .html, to preserve license details available at the botom of the page"""
@ -11,7 +11,7 @@ def saveIndexPHP(config: Config=None, session=None):
print("index.html exists, do not overwrite")
else:
print("Downloading index.php (Main Page) as index.html")
r = session.post(url=config.index, params: Dict=None, timeout=10)
r = session.post(url=config.index, params=None, timeout=10)
raw = str(r.text)
Delay(config=config, session=session)
raw = removeIP(raw=raw)

@ -3,7 +3,7 @@ import os
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def saveSiteInfo(config: Config=None, session=None):

@ -1,4 +1,4 @@
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.dumpgenerator.cli import Delay
def saveLogs(config: Config=None, session=None):

@ -2,7 +2,7 @@ import os
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import removeIP
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def saveSpecialVersion(config: Config=None, session=None):

@ -13,7 +13,7 @@ from wikiteam3.dumpgenerator.log import logerror
from .page_xml import getXMLPage
from wikiteam3.utils import truncateFilename
from wikiteam3.utils import cleanHTML, undoHTMLEntities
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
class Image:
def getXMLFileDesc(config: Config=None, title="", session=None):

@ -7,7 +7,7 @@ import mwclient
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.dump.xmlrev.namespaces import getNamespacesAPI, getNamespacesScraper
from wikiteam3.utils import domain2prefix, cleanHTML, undoHTMLEntities
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def getPageTitlesAPI(config: Config=None, session=None):
@ -78,6 +78,7 @@ def getPageTitlesScraper(config: Config=None, session=None):
while r_suballpages and re.search(r_suballpages, raw) and c < deep:
# load sub-Allpages
m = re.compile(r_suballpages).finditer(raw)
currfr = None
for i in m:
fr = i.group("from")
currfr = fr
@ -114,6 +115,8 @@ def getPageTitlesScraper(config: Config=None, session=None):
name,
namespace,
)
else:
assert False, "Unreachable"
if name not in checked_suballpages:
# to avoid reload dupe subpages links
@ -136,6 +139,8 @@ def getPageTitlesScraper(config: Config=None, session=None):
)
Delay(config=config, session=session)
assert currfr is not None, "re.search found the pattern, but re.finditer fails, why?"
oldfr = currfr
c += 1

@ -1,3 +1,4 @@
from typing import *
import re
import sys
import time
@ -10,7 +11,7 @@ from wikiteam3.dumpgenerator.exceptions import ExportAbortedError, PageMissingEr
from wikiteam3.dumpgenerator.api import handleStatusCode
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.utils import uprint
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def getXMLPageCore(headers: Dict=None, params: Dict=None, config: Config=None, session=None) -> str:

@ -2,7 +2,7 @@ import re
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def getNamespacesScraper(config: Config=None, session=None):
"""Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages"""

@ -8,7 +8,7 @@ from wikiteam3.dumpgenerator.exceptions import PageMissingError
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.dumpgenerator.dump.page.page_titles import readTitles
from wikiteam3.dumpgenerator.dump.page.page_xml import getXMLPage
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.utils import cleanXML, undoHTMLEntities
from .xml_header import getXMLHeader
from .xml_revisions import getXMLRevisions

@ -8,9 +8,9 @@ import requests
from wikiteam3.dumpgenerator.exceptions import ExportAbortedError, PageMissingError
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.dumpgenerator.dump.page.page_xml import getXMLPage
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def getXMLHeader(config: Config=None, session=None) -> Tuple[str, dict]:
def getXMLHeader(config: Config=None, session=None) -> Tuple[str, Config]:
"""Retrieve a random page to extract XML headers (namespace info, etc)"""
# get the header of a random page, to attach it in the complete XML backup
# similar to: <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/"

@ -1,6 +1,7 @@
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from typing import *
from wikiteam3.dumpgenerator.config import Config
def checkXMLIntegrity(config: Config=None, titles=[], session=None):
def checkXMLIntegrity(config: Config=None, titles: Iterable[str]=None, session=None):
"""Check XML dump integrity, to detect broken XML chunks"""
return

@ -10,7 +10,7 @@ from wikiteam3.dumpgenerator.log import logerror
from .namespaces import getNamespacesAPI
from wikiteam3.dumpgenerator.dump.page.page_titles import readTitles
from wikiteam3.dumpgenerator.dump.page.page_xml import makeXmlFromPage, makeXmlPageFromRaw
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def getXMLRevisions(config: Config=None, session=None, allpages=False, start=None):
# FIXME: actually figure out the various strategies for each MediaWiki version
@ -63,6 +63,8 @@ def getXMLRevisions(config: Config=None, session=None, allpages=False, start=Non
print("POST request to the API failed, retrying with GET")
config.http_method = "GET"
continue
else:
raise
except requests.exceptions.ReadTimeout as err:
# Hopefully temporary, just wait a bit and continue with the same request.
# No point putting a limit to retries, we'd need to abort everything.
@ -99,6 +101,8 @@ def getXMLRevisions(config: Config=None, session=None, allpages=False, start=Non
print("POST request to the API failed, retrying with GET")
config.http_method = "GET"
continue
else:
raise
exportparams = {
"action": "query",
"export": "1",
@ -140,6 +144,8 @@ def getXMLRevisions(config: Config=None, session=None, allpages=False, start=Non
exportrequest = site.api(
http_method=config.http_method, **exportparams
)
else:
raise
# This gives us a self-standing <mediawiki> element
# but we only need the inner <page>: we can live with
@ -213,6 +219,8 @@ def getXMLRevisions(config: Config=None, session=None, allpages=False, start=Non
exportrequest = site.api(
http_method=config.http_method, **exportparams
)
else:
raise
xml = str(exportrequest["query"]["export"]["*"])
c += 1
@ -255,9 +263,11 @@ def getXMLRevisions(config: Config=None, session=None, allpages=False, start=Non
):
print("POST request to the API failed, retrying with GET")
config.http_method = "GET"
exportrequest = site.api(
http_method=config.http_method, **exportparams
prequest = site.api(
http_method=config.http_method, **pparams
)
else:
raise
except mwclient.errors.InvalidResponse:
logerror(
config=config, to_stdout=True,

@ -1,6 +1,6 @@
import datetime
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def logerror(config: Config=None,to_stdout=False , text="") -> None:
"""Log error in errors.log"""

@ -25,6 +25,7 @@ import sys
import time
from pathlib import Path
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.utils import domain2prefix
@ -60,17 +61,16 @@ def main():
print("#" * 73)
wiki = wiki.lower()
# Make the prefix in standard way; api and index must be defined, not important which is which
prefix = domain2prefix(config={"api": wiki, "index": wiki})
prefix = domain2prefix(config=Config(api=wiki, index=wiki))
# check if compressed, in that case dump was finished previously
compressed = False
zipfilename = None
for f in os.listdir("."):
if f.endswith(".7z") and f.split("-")[0] == prefix:
compressed = True
zipfilename = f
break # stop searching, dot not explore subdirectories
if compressed:
if zipfilename:
print(
"Skipping... This wiki was downloaded and compressed before in",
zipfilename,

@ -26,9 +26,8 @@ import urllib.parse
from io import BytesIO
from pathlib import Path
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.utils import getUserAgent, domain2prefix
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
import requests
from internetarchive import get_item
@ -81,7 +80,7 @@ def file_md5(path):
return digest.hexdigest()
def upload(wikis, logfile, config: Config=None, uploadeddumps=[]):
def upload(wikis, logfile, config={}, uploadeddumps=[]):
ia_keys = read_ia_keys(config)
headers = {"User-Agent": getUserAgent()}
@ -93,7 +92,7 @@ def upload(wikis, logfile, config: Config=None, uploadeddumps=[]):
print("#" * 73)
wiki = wiki.lower()
try:
prefix = domain2prefix(config={"api": wiki})
prefix = domain2prefix(Config(api=wiki))
except KeyError:
print("ERROR: could not produce the prefix for %s" % wiki)
@ -348,7 +347,7 @@ def upload(wikis, logfile, config: Config=None, uploadeddumps=[]):
c += 1
def main(params=None):
def main(params=[]):
parser = argparse.ArgumentParser(
"""uploader.py

@ -1,6 +1,6 @@
import re
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def domain2prefix(config: Config=None, session=None):
"""Convert domain name to a valid prefix filename."""

@ -1,3 +1,4 @@
from typing import *
from hashlib import md5

@ -1,7 +1,8 @@
import re
import sys
from typing import *
from wikiteam3.dumpgenerator.config import Config, DefaultConfig
from wikiteam3.dumpgenerator.config import Config
def avoidWikimediaProjects(config: Config=None, other: Dict=None):
"""Skip Wikimedia projects and redirect to the dumps website"""

Loading…
Cancel
Save