# meli - scripts/make_html_manual_page.py # # Copyright 2023 Manos Pitsidianakis # # This file is part of meli. # # meli is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # meli is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with meli. If not, see . import subprocess from threading import Timer from html.parser import HTMLParser import argparse import sys import re import shutil import textwrap from subprocess import PIPE from urllib.parse import urlparse from pathlib import Path import http.client from http import HTTPMethod, HTTPStatus import signal import functools from bs4 import BeautifulSoup NO_TTY = False def clear_line(signum, frame): if NO_TTY: return columns = shutil.get_terminal_size().columns message = chr(27) + "[0G" # go to start of line sys.stdout.write(message) message = " " * columns + "\r" sys.stdout.write(message) sys.stdout.flush() draw_progress.max_cols = 0 signal.signal(signal.SIGWINCH, clear_line) MIRRORS = [ "http://linux.die.net/man/%S/%N", "http://man7.org/linux/man-pages/man%S/%N.%S.html", "http://manpages.debian.org/stable/%N.%S.en.html", "http://man.bsd.lv/%N.%S", "http://man.archlinux.org/man/%N.%S", "http://man.voidlinux.org/%N.%S", "http://man.bsd.lv/OpenBSD-7.0/%N.%S", "http://man.bsd.lv/FreeBSD-13.0/%N.%S", "http://man.bsd.lv/POSIX-2013/%N.%S", # last resorts "http://man.bsd.lv/UNIX-7/%N.%S", "https://www.unix.com/man-page/mojave/%S/%N/", ] def add_progress(count=1): add_progress.count += count add_progress.count = 0.0 def draw_progress(total, count=None, status=""): if NO_TTY: return columns = shutil.get_terminal_size().columns status = textwrap.shorten( status, width=columns - draw_progress.bar_len - len("100.0% ...") - 8 ) if count is None: count = add_progress.count bar_len = draw_progress.bar_len filled_len = int(round(bar_len * count / float(total))) percents = round(100.0 * count / float(total), 1) bar = "=" * filled_len + "-" * (bar_len - filled_len) message = f"[{bar}] {percents}% ...{status}" draw_progress.max_cols = max(len(message) + 1, draw_progress.max_cols) spaces = " " * (draw_progress.max_cols - len(message)) message += f"{spaces}\r" sys.stdout.write(message) sys.stdout.flush() draw_progress.max_cols = 0 draw_progress.bar_len = 62 class ManFixer(HTMLParser): whitespace = r"\s{2,}" output = "" extract_href = False def reset(self): self.output = "" super().reset() def handle_starttag(self, tag, attrs): attrs = {a[0]: a[1] for a in attrs} if tag == "a" and self.extract_href and "href" in attrs: self.output += re.sub(self.whitespace, " ", attrs["href"]).replace( "\ufeff", "" ) self.output += " " def handle_endtag(self, tag): pass def handle_data(self, data): self.output += re.sub(self.whitespace, " ", data).replace("\ufeff", "") @staticmethod def extract(input_): parser = ManFixer() parser.feed(input_) return parser.output @functools.cache def give_me_head(url_): o = urlparse(url_) conn = http.client.HTTPSConnection(o.hostname, timeout=6) conn.request(HTTPMethod.HEAD, o.path) response = conn.getresponse() if response.status in ( HTTPStatus.FOUND, HTTPStatus.TEMPORARY_REDIRECT, HTTPStatus.PERMANENT_REDIRECT, HTTPStatus.MOVED_PERMANENTLY, ): # print("for ", url_, "following redirect", response.status) give_me_head.redirects += 1 if give_me_head.redirects > 3: return None if response.getheader("Location"): # print("for ", url_, "following redirect to ", response.getheader("Location")) return give_me_head(response.getheader("Location")) print("bailout") return None # print("for ", url_, "code is ", response.status) give_me_head.redirects = 0 if response.status == http.HTTPStatus.OK: return url_ return None give_me_head.redirects = 0 def man_to_path(man: str) -> str: exp = r"(.+)(\d{1,})$" result = re.match(exp, man) if not result: return man return f"{result[2]}/{result[1][:-1]}" def draw_spinner(): if NO_TTY: return message = ( chr(27) + "[" + str(draw_spinner.columns - 1) + "C" + draw_spinner.frames[draw_spinner.index] + "\r" ) sys.stdout.write(message) sys.stdout.flush() draw_spinner.index += 1 draw_spinner.index = draw_spinner.index % len(draw_spinner.frames) draw_spinner.timer = Timer(draw_spinner.interval, draw_spinner) draw_spinner.timer.start() draw_spinner.interval = 0.1 draw_spinner.columns = shutil.get_terminal_size().columns draw_spinner.frames = ["|", "/", "-", "\\"] draw_spinner.index = 0 if __name__ == "__main__": draw_spinner.timer = Timer(draw_spinner.interval, draw_spinner) parser = argparse.ArgumentParser( description="Generates a element from a mdoc manpage." ) parser.add_argument("page", type=str, help="mdoc file") parser.add_argument( "--output", "-o", type=str, help="name of output file", required=False, default=None, ) parser.add_argument( "--name", "-n", type=str, help="name used for html IDs. defaults to file name stem.", required=False, default=None, ) parser.add_argument( "--refs", type=bool, help="find external manpages and hyperlink to them", required=False, default=True, ) parser.add_argument( "--no-tty", help="don't draw progress animation", required=False, default=False, action="store_true", ) parser.add_argument( "--include-refs", type=str, help="comma separated list of manpages to relatively hyperlink", required=False, default="", ) parser.add_argument( "--exclude-refs", type=str, help="comma separated list of manpages to not hyperlink", required=False, default="", ) parser.add_argument( "--mandoc", type=str, help="alternative mandoc binary path", required=False, default="mandoc", ) parser.add_argument( "--no-css", type=bool, help="don't prepend """ ) f.write(soup.prettify()) print("Written to ", args.output)