mirror of
https://git.meli.delivery/meli/meli
synced 2024-11-15 06:12:47 +00:00
378 lines
11 KiB
Python
378 lines
11 KiB
Python
|
# meli - scripts/make_html_manual_page.py
|
||
|
#
|
||
|
# Copyright 2023 Manos Pitsidianakis
|
||
|
#
|
||
|
# This file is part of meli.
|
||
|
#
|
||
|
# meli is free software: you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation, either version 3 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
#
|
||
|
# meli is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with meli. If not, see <http://www.gnu.org/licenses/>.
|
||
|
|
||
|
import subprocess
|
||
|
from threading import Timer
|
||
|
from html.parser import HTMLParser
|
||
|
import argparse
|
||
|
import sys
|
||
|
import re
|
||
|
import shutil
|
||
|
import textwrap
|
||
|
from subprocess import PIPE
|
||
|
from urllib.parse import urlparse
|
||
|
from pathlib import Path
|
||
|
import http.client
|
||
|
from http import HTTPMethod, HTTPStatus
|
||
|
import signal
|
||
|
import functools
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
NO_TTY = False
|
||
|
|
||
|
|
||
|
def clear_line(signum, frame):
|
||
|
if NO_TTY:
|
||
|
return
|
||
|
columns = shutil.get_terminal_size().columns
|
||
|
message = chr(27) + "[0G" # go to start of line
|
||
|
sys.stdout.write(message)
|
||
|
message = " " * columns + "\r"
|
||
|
sys.stdout.write(message)
|
||
|
sys.stdout.flush()
|
||
|
draw_progress.max_cols = 0
|
||
|
|
||
|
|
||
|
signal.signal(signal.SIGWINCH, clear_line)
|
||
|
|
||
|
TEMPLATES = [
|
||
|
"http://linux.die.net/man/%S/%N",
|
||
|
"http://man7.org/linux/man-pages/man%S/%N.%S.html",
|
||
|
"http://manpages.debian.org/stable/%N.%S.en.html",
|
||
|
"http://man.archlinux.org/man/%N.%S",
|
||
|
"http://man.voidlinux.org/%N.%S",
|
||
|
"http://man.bsd.lv/%N.%S",
|
||
|
"http://man.bsd.lv/OpenBSD-7.0/%N.%S",
|
||
|
"http://man.bsd.lv/FreeBSD-13.0/%N.%S",
|
||
|
"http://man.bsd.lv/POSIX-2013/%N.%S", # last resorts
|
||
|
"http://man.bsd.lv/UNIX-7/%N.%S",
|
||
|
]
|
||
|
|
||
|
|
||
|
def add_progress(count=1):
|
||
|
add_progress.count += count
|
||
|
|
||
|
|
||
|
add_progress.count = 0.0
|
||
|
|
||
|
|
||
|
def draw_progress(total, count=None, status=""):
|
||
|
if NO_TTY:
|
||
|
return
|
||
|
columns = shutil.get_terminal_size().columns
|
||
|
status = textwrap.shorten(
|
||
|
status, width=columns - draw_progress.bar_len - len("100.0% ...") - 8
|
||
|
)
|
||
|
if count is None:
|
||
|
count = add_progress.count
|
||
|
bar_len = draw_progress.bar_len
|
||
|
filled_len = int(round(bar_len * count / float(total)))
|
||
|
|
||
|
percents = round(100.0 * count / float(total), 1)
|
||
|
bar = "=" * filled_len + "-" * (bar_len - filled_len)
|
||
|
|
||
|
message = f"[{bar}] {percents}% ...{status}"
|
||
|
draw_progress.max_cols = max(len(message) + 1, draw_progress.max_cols)
|
||
|
spaces = " " * (draw_progress.max_cols - len(message))
|
||
|
message += f"{spaces}\r"
|
||
|
sys.stdout.write(message)
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
|
||
|
draw_progress.max_cols = 0
|
||
|
draw_progress.bar_len = 62
|
||
|
|
||
|
|
||
|
class ManFixer(HTMLParser):
|
||
|
whitespace = r"\s{2,}"
|
||
|
output = ""
|
||
|
extract_href = False
|
||
|
|
||
|
def reset(self):
|
||
|
self.output = ""
|
||
|
super().reset()
|
||
|
|
||
|
def handle_starttag(self, tag, attrs):
|
||
|
attrs = {a[0]: a[1] for a in attrs}
|
||
|
if tag == "a" and self.extract_href and "href" in attrs:
|
||
|
self.output += re.sub(self.whitespace, " ", attrs["href"]).replace(
|
||
|
"\ufeff", ""
|
||
|
)
|
||
|
self.output += " "
|
||
|
|
||
|
def handle_endtag(self, tag):
|
||
|
pass
|
||
|
|
||
|
def handle_data(self, data):
|
||
|
self.output += re.sub(self.whitespace, " ", data).replace("\ufeff", "")
|
||
|
|
||
|
@staticmethod
|
||
|
def extract(input_):
|
||
|
parser = ManFixer()
|
||
|
parser.feed(input_)
|
||
|
return parser.output
|
||
|
|
||
|
|
||
|
@functools.cache
|
||
|
def give_me_head(url, url_, name, section):
|
||
|
o = urlparse(url_)
|
||
|
conn = http.client.HTTPSConnection(o.hostname, timeout=6)
|
||
|
conn.request(HTTPMethod.HEAD, o.path)
|
||
|
response = conn.getresponse()
|
||
|
if (
|
||
|
response.status == HTTPStatus.FOUND
|
||
|
or response.status == HTTPStatus.TEMPORARY_REDIRECT
|
||
|
or response.status == HTTPStatus.PERMANENT_REDIRECT
|
||
|
or response.status == HTTPStatus.MOVED_PERMANENTLY
|
||
|
):
|
||
|
# print("for ", url_, "following redirect", response.status)
|
||
|
give_me_head.redirects += 1
|
||
|
if give_me_head.redirects > 3:
|
||
|
return None
|
||
|
if response.getheader("Location"):
|
||
|
# print("for ", url_, "following redirect to ", response.getheader("Location"))
|
||
|
return give_me_head(url, response.getheader("Location"), name, section)
|
||
|
print("bailout")
|
||
|
return None
|
||
|
# print("for ", url_, "code is ", response.status)
|
||
|
give_me_head.redirects = 0
|
||
|
if response.status == http.HTTPStatus.OK:
|
||
|
return url_
|
||
|
return None
|
||
|
|
||
|
|
||
|
give_me_head.redirects = 0
|
||
|
|
||
|
|
||
|
def man_to_path(man: str) -> str:
|
||
|
exp = r"(.+)(\d{1,})$"
|
||
|
result = re.match(exp, man)
|
||
|
if not result:
|
||
|
return man
|
||
|
return f"{result[2]}/{result[1][:-1]}"
|
||
|
|
||
|
|
||
|
def draw_spinner():
|
||
|
if NO_TTY:
|
||
|
return
|
||
|
message = (
|
||
|
chr(27)
|
||
|
+ "["
|
||
|
+ str(draw_spinner.columns - 1)
|
||
|
+ "C"
|
||
|
+ draw_spinner.frames[draw_spinner.index]
|
||
|
+ "\r"
|
||
|
)
|
||
|
sys.stdout.write(message)
|
||
|
sys.stdout.flush()
|
||
|
draw_spinner.index += 1
|
||
|
draw_spinner.index = draw_spinner.index % len(draw_spinner.frames)
|
||
|
draw_spinner.timer = Timer(draw_spinner.interval, draw_spinner)
|
||
|
draw_spinner.timer.start()
|
||
|
|
||
|
|
||
|
draw_spinner.interval = 0.1
|
||
|
draw_spinner.columns = shutil.get_terminal_size().columns
|
||
|
draw_spinner.frames = ["|", "/", "-", "\\"]
|
||
|
draw_spinner.index = 0
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
draw_spinner.timer = Timer(draw_spinner.interval, draw_spinner)
|
||
|
parser = argparse.ArgumentParser(
|
||
|
description="Generates a <table> element from a mdoc manpage."
|
||
|
)
|
||
|
parser.add_argument("page", type=str, help="mdoc file")
|
||
|
parser.add_argument(
|
||
|
"--output",
|
||
|
"-o",
|
||
|
type=str,
|
||
|
help="name of output file",
|
||
|
required=False,
|
||
|
default=None,
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
"--name",
|
||
|
"-n",
|
||
|
type=str,
|
||
|
help="name used for html IDs. defaults to file name stem.",
|
||
|
required=False,
|
||
|
default=None,
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
"--refs",
|
||
|
type=bool,
|
||
|
help="find external manpages and hyperlink to them",
|
||
|
required=False,
|
||
|
default=False,
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
"--no-tty",
|
||
|
help="don't draw progress animation",
|
||
|
required=False,
|
||
|
default=False,
|
||
|
action="store_true",
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
"--exclude-refs",
|
||
|
type=str,
|
||
|
help="comma separated list of manpages to not hyperlink",
|
||
|
required=False,
|
||
|
default="",
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
"--mandoc",
|
||
|
type=str,
|
||
|
help="alternative mandoc binary path",
|
||
|
required=False,
|
||
|
default="mandoc",
|
||
|
)
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
if args.exclude_refs:
|
||
|
args.exclude_refs = [s.strip() for s in args.exclude_refs.split(",")]
|
||
|
if not args.output:
|
||
|
args.output = Path.cwd() / (Path(args.page).name + ".html")
|
||
|
if not args.name:
|
||
|
args.name = Path(args.page).name
|
||
|
|
||
|
manpage = open(args.page, "r", encoding="utf-8").read()
|
||
|
if args.refs:
|
||
|
refs_url = ',man="%N\t%S"'
|
||
|
else:
|
||
|
refs_url = ""
|
||
|
|
||
|
NO_TTY = args.no_tty
|
||
|
html_output = subprocess.run(
|
||
|
f'{args.mandoc} -I os="rendered by mandoc" -Kutf-8 -Ofragment,toc,includes="#%I"{refs_url} -Thtml "{args.page}" | sed \'s/\s*<\/pre/<\/pre/\'',
|
||
|
stdout=PIPE,
|
||
|
shell=True,
|
||
|
check=True,
|
||
|
).stdout.decode("utf-8")
|
||
|
|
||
|
html_output = re.sub(
|
||
|
r"(?:(?:[⟨])|(?:⟨))(.+)(?:(?:[⟩])|(?:⟩))",
|
||
|
'<kbd class="manpage-kbd">\\1</kbd>',
|
||
|
html_output,
|
||
|
flags=re.MULTILINE,
|
||
|
)
|
||
|
|
||
|
soup = BeautifulSoup(html_output, "html.parser")
|
||
|
targets = set()
|
||
|
for target in soup.find_all(lambda tag: tag.has_attr("id")):
|
||
|
id_ = target.get("id")
|
||
|
targets.add(id_)
|
||
|
root_table = next(soup.children)
|
||
|
root_table["id"] = args.name
|
||
|
|
||
|
if args.refs:
|
||
|
total = len(soup.find_all("a"))
|
||
|
print(f"Replacing `href` attributes in {total} hyperlinks...")
|
||
|
|
||
|
draw_spinner.timer.start()
|
||
|
for link in soup.find_all("a"):
|
||
|
href = link.get("href")
|
||
|
if href.startswith("#") and href[1:] in targets:
|
||
|
link["href"] = "#" + args.name + "_" + href[1:]
|
||
|
add_progress()
|
||
|
draw_progress(total)
|
||
|
elif href.startswith("#"):
|
||
|
add_progress()
|
||
|
draw_progress(total)
|
||
|
else:
|
||
|
exp = r"(.+)\t(.+)$"
|
||
|
result = re.match(exp, href)
|
||
|
if result:
|
||
|
link["href"] = f"./{result[1]}.{result[2]}.html"
|
||
|
name = result[1]
|
||
|
section = result[2]
|
||
|
if (
|
||
|
name in args.exclude_refs
|
||
|
or f"{name}.{section}" in args.exclude_refs
|
||
|
):
|
||
|
add_progress()
|
||
|
draw_progress(
|
||
|
total,
|
||
|
status=f"{name}.{section}: Excluding ref because it is in --exclude-refs list. Leaving it as {link['href']}",
|
||
|
)
|
||
|
continue
|
||
|
found = False
|
||
|
for url in TEMPLATES:
|
||
|
add_progress(1.0 / (len(TEMPLATES) * 1.0))
|
||
|
if found:
|
||
|
continue
|
||
|
draw_progress(
|
||
|
total,
|
||
|
status=f"{name}.{section}: searching for an online mirror",
|
||
|
)
|
||
|
url_ = url.replace("%N", name).replace("%S", section)
|
||
|
try:
|
||
|
got = give_me_head(url, url_, name, section)
|
||
|
if got:
|
||
|
link["href"] = got
|
||
|
found = True
|
||
|
continue
|
||
|
except Exception as exc:
|
||
|
if "handshake operation timed out" not in str(exc):
|
||
|
print(f"got {exc} for url {url_}")
|
||
|
else:
|
||
|
add_progress()
|
||
|
draw_progress(total)
|
||
|
draw_spinner.timer.cancel()
|
||
|
clear_line(None, None)
|
||
|
|
||
|
for target in soup.find_all(lambda tag: tag.has_attr("id")):
|
||
|
id_ = target.get("id")
|
||
|
if id_ in targets:
|
||
|
id_ = args.name + "_" + id_
|
||
|
target["id"] = id_
|
||
|
|
||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||
|
f.write(
|
||
|
"""
|
||
|
<style>
|
||
|
code.Ic, code.Li, code.Cm, code.Nm, kbd.manpage-kbd{
|
||
|
display: inline-block;
|
||
|
}
|
||
|
kbd {
|
||
|
background-color: #eee;
|
||
|
border-radius: 3px;
|
||
|
border: 1px solid #b4b4b4;
|
||
|
box-shadow:
|
||
|
0 1px 1px rgba(0, 0, 0, 0.2),
|
||
|
0 2px 0 0 rgba(255, 255, 255, 0.7) inset;
|
||
|
color: #333;
|
||
|
display: inline-block;
|
||
|
font-size: 0.85em;
|
||
|
font-weight: 700;
|
||
|
line-height: 1;
|
||
|
padding: 2px 4px;
|
||
|
white-space: nowrap;
|
||
|
}
|
||
|
code {
|
||
|
background-color: #eee;
|
||
|
border-radius: 3px;
|
||
|
font-family: courier, monospace;
|
||
|
padding: 0 3px;
|
||
|
}
|
||
|
</style>
|
||
|
"""
|
||
|
)
|
||
|
f.write(soup.prettify())
|
||
|
print("Written to ", args.output)
|