Refactor dumpgenerator into sub-packages

pull/475/head
Misty 1 year ago
parent 121d2efb2d
commit a33a5cd0b2

4
.gitignore vendored

@ -8,3 +8,7 @@ __pycache__
tests/tmp
.DS_Store
desktop.ini
.venv
.vscode
.idea

@ -19,7 +19,7 @@
# https://github.com/WikiTeam/wikiteam/wiki
from .generator import DumpGenerator
from wikiteam3.dumpgenerator.dump import DumpGenerator
def main():

@ -0,0 +1,4 @@
from .api import checkAPI, checkRetryAPI, mwGetAPIAndIndex
from .get_json import getJSON
from .handle_status_code import handleStatusCode
from .wiki_check import getWikiEngine

@ -6,7 +6,7 @@ import mwclient
import requests
from .get_json import getJSON
from .user_agent import getUserAgent
from wikiteam3.utils import getUserAgent
def checkAPI(api=None, session=None):

@ -2,7 +2,7 @@ import re
import requests
from .user_agent import getUserAgent
from wikiteam3.utils import getUserAgent
def getWikiEngine(url="", session=None) -> str:

@ -0,0 +1,3 @@
from .cli import getParameters
from .greeter import bye, welcome
from .delay import Delay

@ -7,12 +7,12 @@ import sys
import requests
from .api import checkRetryAPI, mwGetAPIAndIndex
from .domain import domain2prefix
from .index_check import checkIndex
from .user_agent import getUserAgent
from .version import getVersion
from .wiki_check import getWikiEngine
from wikiteam3.dumpgenerator.api import checkRetryAPI, mwGetAPIAndIndex
from wikiteam3.utils import domain2prefix
from wikiteam3.dumpgenerator.api.index_check import checkIndex
from wikiteam3.utils import getUserAgent
from wikiteam3.dumpgenerator.version import getVersion
from wikiteam3.dumpgenerator.api import getWikiEngine
def getParameters(params=[]):

@ -1,6 +1,6 @@
import datetime
from .version import getVersion
from wikiteam3.dumpgenerator.version import getVersion
def welcome():

@ -0,0 +1 @@
from .generator import DumpGenerator

@ -20,21 +20,21 @@ except ImportError:
)
sys.exit(1)
from .cli import getParameters
from .config import loadConfig, saveConfig
from .domain import domain2prefix
from .greeter import bye, welcome
from .image import Image
from .index_php import saveIndexPHP
from .logs import saveLogs
from .page_special_version import saveSpecialVersion
from .page_titles import getPageTitles, readTitles
from .site_info import saveSiteInfo
from .truncate import truncateFilename
from .util import undoHTMLEntities
from .wiki_avoid import avoidWikimediaProjects
from .xml_dump import generateXMLDump
from .xml_integrity import checkXMLIntegrity
from wikiteam3.dumpgenerator.config import loadConfig, saveConfig
from wikiteam3.dumpgenerator.cli import getParameters, bye, welcome
from wikiteam3.utils import domain2prefix
from wikiteam3.utils import truncateFilename
from wikiteam3.utils import undoHTMLEntities
from wikiteam3.utils import avoidWikimediaProjects
from .page.image import Image
from .misc.index_php import saveIndexPHP
from .misc.logs import saveLogs
from .misc.page_special_version import saveSpecialVersion
from .page.page_titles import getPageTitles, readTitles
from .misc.site_info import saveSiteInfo
from .xmlrev.xml_dump import generateXMLDump
from .xmlrev.xml_integrity import checkXMLIntegrity
# From https://stackoverflow.com/a/57008707
class Tee(object):
@ -60,6 +60,7 @@ class Tee(object):
self.stdout.flush()
class DumpGenerator:
@staticmethod
def __init__(params=[]):
"""Main function"""
configfilename = "config.json"
@ -116,6 +117,7 @@ class DumpGenerator:
saveSiteInfo(config=config, session=other["session"])
bye()
@staticmethod
def createNewDump(config={}, other={}):
images = []
print("Trying generating a new dump into a new directory...")
@ -133,6 +135,7 @@ class DumpGenerator:
if config["logs"]:
saveLogs(config=config, session=other["session"])
@staticmethod
def resumePreviousDump(config={}, other={}):
images = []
print("Resuming previous dump process...")

@ -1,7 +1,7 @@
import os
from .delay import Delay
from .util import removeIP
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import removeIP
def saveIndexPHP(config={}, session=None):

@ -1,4 +1,4 @@
from .delay import Delay
from wikiteam3.dumpgenerator.cli import Delay
def saveLogs(config={}, session=None):

@ -1,7 +1,7 @@
import os
from .delay import Delay
from .util import removeIP
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import removeIP
def saveSpecialVersion(config={}, session=None):

@ -1,8 +1,8 @@
import json
import os
from .delay import Delay
from .get_json import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.api import getJSON
def saveSiteInfo(config={}, session=None):

@ -3,15 +3,15 @@ import re
import sys
import urllib
from .delay import Delay
from .domain import domain2prefix
from .exceptions import PageMissingError
from .get_json import getJSON
from .handle_status_code import handleStatusCode
from .log_error import logerror
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import domain2prefix
from wikiteam3.dumpgenerator.exceptions import PageMissingError
from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.api import handleStatusCode
from wikiteam3.dumpgenerator.log import logerror
from .page_xml import getXMLPage
from .truncate import truncateFilename
from .util import cleanHTML, undoHTMLEntities
from wikiteam3.utils import truncateFilename
from wikiteam3.utils import cleanHTML, undoHTMLEntities
class Image:

@ -4,10 +4,9 @@ from urllib.parse import urlparse
import mwclient
from .delay import Delay
from .domain import domain2prefix
from .namespaces import getNamespacesAPI, getNamespacesScraper
from .util import cleanHTML, undoHTMLEntities
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.dump.xmlrev.namespaces import getNamespacesAPI, getNamespacesScraper
from wikiteam3.utils import domain2prefix, cleanHTML, undoHTMLEntities
def getPageTitlesAPI(config={}, session=None):

@ -6,10 +6,10 @@ import requests
from lxml import etree
from lxml.builder import E
from .exceptions import ExportAbortedError, PageMissingError
from .handle_status_code import handleStatusCode
from .log_error import logerror
from .uprint import uprint
from wikiteam3.dumpgenerator.exceptions import ExportAbortedError, PageMissingError
from wikiteam3.dumpgenerator.api import handleStatusCode
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.utils import uprint
def getXMLPageCore(headers={}, params={}, config={}, session=None) -> str:

@ -1,7 +1,7 @@
import re
from .delay import Delay
from .get_json import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.api import getJSON
def getNamespacesScraper(config={}, session=None):

@ -1,13 +1,13 @@
import re
import sys
from .delay import Delay
from .domain import domain2prefix
from .exceptions import PageMissingError
from .log_error import logerror
from .page_titles import readTitles
from .page_xml import getXMLPage
from .util import cleanXML, undoHTMLEntities
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.utils import domain2prefix
from wikiteam3.dumpgenerator.exceptions import PageMissingError
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.dumpgenerator.dump.page.page_titles import readTitles
from wikiteam3.dumpgenerator.dump.page.page_xml import getXMLPage
from wikiteam3.utils import cleanXML, undoHTMLEntities
from .xml_header import getXMLHeader
from .xml_revisions import getXMLRevisions
from .xml_truncate import truncateXMLDump

@ -1,15 +1,16 @@
import json
import re
import sys
from typing import *
import requests
from .exceptions import ExportAbortedError, PageMissingError
from .log_error import logerror
from .page_xml import getXMLPage
from wikiteam3.dumpgenerator.exceptions import ExportAbortedError, PageMissingError
from wikiteam3.dumpgenerator.log import logerror
from wikiteam3.dumpgenerator.dump.page.page_xml import getXMLPage
def getXMLHeader(config: dict = {}, session=None) -> tuple[str, dict]:
def getXMLHeader(config: dict = {}, session=None) -> Tuple[str, dict]:
"""Retrieve a random page to extract XML headers (namespace info, etc)"""
# get the header of a random page, to attach it in the complete XML backup
# similar to: <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/"

@ -5,11 +5,11 @@ from urllib.parse import urlparse
import mwclient
import requests
from .exceptions import PageMissingError
from .log_error import logerror
from wikiteam3.dumpgenerator.exceptions import PageMissingError
from wikiteam3.dumpgenerator.log import logerror
from .namespaces import getNamespacesAPI
from .page_titles import readTitles
from .page_xml import makeXmlFromPage, makeXmlPageFromRaw
from wikiteam3.dumpgenerator.dump.page.page_titles import readTitles
from wikiteam3.dumpgenerator.dump.page.page_xml import makeXmlFromPage, makeXmlPageFromRaw
def getXMLRevisions(config={}, session=None, allpages=False, start=None):

@ -0,0 +1 @@
from .log_error import logerror

@ -25,7 +25,7 @@ import sys
import time
from pathlib import Path
from .dumpgenerator.domain import domain2prefix
from wikiteam3.utils import domain2prefix
def main():

@ -26,9 +26,7 @@ import urllib.parse
from io import BytesIO
from pathlib import Path
from . import dumpgenerator
from .dumpgenerator.user_agent import getUserAgent
from .dumpgenerator.domain import domain2prefix
from wikiteam3.utils import getUserAgent, domain2prefix
import requests
from internetarchive import get_item

@ -0,0 +1,7 @@
from .uprint import uprint
from .util import removeIP, cleanXML, cleanHTML, undoHTMLEntities
from .user_agent import getUserAgent
from .domain import domain2prefix
from .truncate import truncateFilename
from .wiki_avoid import avoidWikimediaProjects
Loading…
Cancel
Save