Fix file write (use UTF-8 instead of bytes)

Signed-off-by: Elsie Hupp <github@elsiehupp.com>
pull/446/head
Elsie Hupp 2 years ago
parent d14899f134
commit 64cdd7b0cb
No known key found for this signature in database
GPG Key ID: 4026ACBE8D448CDC

Binary file not shown.

Binary file not shown.

@ -7,11 +7,16 @@ def welcome():
message = ""
"""Opening message"""
message += "#" * 73
message += """
# Welcome to DumpGenerator %s by WikiTeam (GPL v3) #
# More info at: https://github.com/WikiTeam/wikiteam #""" % (
message += "\n"
welcome_string = "# Welcome to DumpGenerator %s by WikiTeam (GPL v3)" % (
getVersion()
)
welcome_string += " " * (73 - len(welcome_string) - 1) + "#"
message += welcome_string
message += "\n"
message += (
"# More info at: https://github.com/elsiehupp/wikiteam3 #"
)
message += "\n"
message += "#" * 73
message += "\n"

@ -15,5 +15,5 @@ def saveIndexPHP(config={}, session=None):
raw = r.text
delay(config=config, session=session)
raw = removeIP(raw=raw)
with open("%s/index.html" % (config["path"]), "wb") as outfile:
outfile.write(bytes(raw, "utf-8"))
with open("%s/index.html" % (config["path"]), "w", encoding="utf-8") as outfile:
outfile.write(str(raw))

@ -17,5 +17,7 @@ def saveSpecialVersion(config={}, session=None):
raw = r.text
delay(config=config, session=session)
raw = removeIP(raw=raw)
with open("%s/Special:Version.html" % (config["path"]), "wb") as outfile:
outfile.write(bytes(raw, "utf-8"))
with open(
"%s/Special:Version.html" % (config["path"]), "w", encoding="utf-8"
) as outfile:
outfile.write(str(raw))

@ -64,8 +64,12 @@ def removeIP(raw=""):
def cleanXML(xml=""):
"""Trim redundant info from the XML however it comes"""
# do not touch XML codification, leave AS IS
# EDIT 2022: we are making this explicitly Unicode
# for Windows compatibility.
# If the encoding has to stay as is, we'll have
# to change all the file encodings, as well.
if re.search(r"</siteinfo>\n", xml):
xml = xml.split("</siteinfo>\n")[1]
xml = xml.split("</siteinfo>\n")[1].encode("utf-8")
if re.search(r"</mediawiki>", xml):
xml = xml.split("</mediawiki>")[0]
xml = xml.split("</mediawiki>")[0].encode("utf-8")
return xml

@ -35,8 +35,10 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
)
else:
print("Retrieving the XML for every page from the beginning")
xmlfile = open("{}/{}".format(config["path"], xmlfilename), "wb")
xmlfile.write(header)
xmlfile = open(
"{}/{}".format(config["path"], xmlfilename), "w", encoding="utf-8"
)
xmlfile.write(str(header))
try:
r_timestamp = "<timestamp>([^<]+)</timestamp>"
for xml in getXMLRevisions(config=config, session=session, start=start):
@ -50,6 +52,9 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
print(e)
print("This API library version is not working")
sys.exit()
except UnicodeEncodeError as e:
print(e)
else:
print(
'Retrieving the XML for every page from "%s"' % (start and start or "start")
@ -65,7 +70,7 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
xmlfile = open(
"{}/{}".format(config["path"], xmlfilename), "w", encoding="utf-8"
)
xmlfile.write(header)
xmlfile.write(str(header))
xmlfile.close()
xmlfile = open(
@ -98,6 +103,6 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
# (logged in errors log)
c += 1
xmlfile.write(footer)
xmlfile.write(str(footer))
xmlfile.close()
print("XML dump saved at...", xmlfilename)

Loading…
Cancel
Save