Blocks working

2024-11-12 07:10:26 +00:00 · 2022-04-05 10:40:49 -04:00 · 2022-04-05 10:40:49 -04:00 · 98ac4f744a
commit 98ac4f744a
parent dcbcc5d34f
4 changed files with 691 additions and 0 deletions
--- a/z/esc.go
+++ b/z/esc.go
@ -0,0 +1,26 @@
+package Z
+
+import "github.com/rwxrob/fn"
+
+// EscThese is set to the default UNIX shell characters which require
+// escaping to be used safely on the terminal. It can be changed to suit
+// the needs of different host shell environments.
+var EscThese = " \r\t\n|&;()<>![]"
+
+// Esc returns a shell-escaped version of the string s. The returned value
+// is a string that can safely be used as one token in a shell command line.
+func Esc(s string) string {
+	var buf []rune
+	for _, r := range s {
+		for _, esc := range EscThese {
+			if r == esc {
+				buf = append(buf, '\\')
+			}
+		}
+		buf = append(buf, r)
+	}
+	return string(buf)
+}
+
+// EscAll calls Esc on all passed strings.
+func EscAll(args []string) []string { return fn.Map(args, Esc) }
--- a/z/mark.go
+++ b/z/mark.go
@ -0,0 +1,417 @@
+package Z
+
+import (
+	"log"
+	"regexp"
+	"unicode"
+
+	"github.com/rwxrob/scan"
+	"github.com/rwxrob/term"
+	"github.com/rwxrob/to"
+)
+
+// This file contains BonzaiMark.
+
+// IndentBy is the number of spaces to indent in Indent. Default is 7.
+// Bonzai command tree creator can change this for every composite
+// command imported their application in this one place.
+var IndentBy = 7
+
+// Columns is the number of bytes (not runes) at which Wrap will wrap.
+// Default is 80. Bonzai command tree creator can change this for every
+// composite command imported their application in this one place.
+var Columns = 80
+
+// Emph renders BonzaiMark emphasis spans specifically for
+// VT100-compatible terminals (which almost all are today):
+//
+//     *Italic*
+//     **Bold**
+//     ***BoldItalic***
+//     <under> (keeping brackets)
+//
+// See Mark for block formatting and rwxrob/term for terminal rendering.
+func Emph(buf string) string {
+	var nbuf []rune
+	var opentok, closetok bool
+	var otok, ctok string
+	prev := ' '
+
+	for i := 0; i < len([]rune(buf)); i++ {
+		r := []rune(buf)[i]
+
+		if r == '<' {
+			nbuf = append(nbuf, '<')
+			nbuf = append(nbuf, []rune(term.Under)...)
+			for {
+				i++
+				r = rune(buf[i])
+				if r == '>' {
+					i++
+					break
+				}
+				nbuf = append(nbuf, r)
+			}
+			nbuf = append(nbuf, []rune(term.Reset)...)
+			nbuf = append(nbuf, '>')
+			i--
+			continue
+		}
+
+		if r != '*' {
+
+			if opentok {
+				tokval := " "
+				if !unicode.IsSpace(r) {
+					switch otok {
+					case "*":
+						tokval = term.Italic
+					case "**":
+						tokval = term.Bold
+					case "***":
+						tokval = term.BoldItalic
+					}
+				} else {
+					tokval = otok
+				}
+				nbuf = append(nbuf, []rune(tokval)...)
+				opentok = false
+				otok = ""
+			}
+
+			if closetok {
+				nbuf = append(nbuf, []rune(term.Reset)...) // practical, not perfect
+				ctok = ""
+				closetok = false
+			}
+
+			prev = r
+			nbuf = append(nbuf, r)
+			continue
+		}
+
+		// everything else for '*'
+		if unicode.IsSpace(prev) || opentok {
+			opentok = true
+			otok += string(r)
+			continue
+		}
+
+		// only closer conditions remain
+		if !unicode.IsSpace(prev) {
+			closetok = true
+			ctok += string(r)
+			continue
+		}
+
+		// nothing special
+		closetok = false
+		nbuf = append(nbuf, r)
+	}
+
+	// for tokens at the end of a block
+	if closetok {
+		nbuf = append(nbuf, []rune(term.Reset)...)
+	}
+
+	return string(nbuf)
+}
+
+// Wrap wraps to Columns width.
+func Wrap(in string) string { w, _ := to.Wrapped(in, Columns); return w }
+
+// Indent indents the number of spaces set by IndentBy.
+func Indent(in string) string { return to.Indented(in, IndentBy) }
+
+// InWrap combines both Wrap and Indent.
+func InWrap(in string) string {
+	w, _ := to.Wrapped(in, Columns-IndentBy)
+	return to.Indented(w, IndentBy)
+}
+
+// ---------------------------- finish this ---------------------------
+
+// TODO finish the full Mark implementation and documentation
+
+/*
+Mark renders BonzaiMark markup as formatted VT100-compatible terminal
+output, wrapped and indented as specified by the package variables
+IndentBy and Columns.
+
+BonzaiMark
+
+BonzaiMark is a minimal subset of CommonMark (Markdown) suitable for
+rendering to terminals, web pages, PDF, or plain text. The syntax is
+deliberately simple and easy to read (much like standard GoDoc) but rich
+enough to look well rendered in documents as well as the terminal.
+
+Stripped Indentation
+
+Unlike CommonMark, BonzaiMark ignores any number of blank lines or
+whitespace before the first line and uses the initial characters
+preceding the first line (tabs or spaces) as a basis for what to strip
+from every subsequent line of the document. All trailing white space and
+blank lines are also discarded. This allows BonzaiMark to be included in
+source code in very readable ways (preferably with backtick string
+literals).
+
+    some := `
+		    Here is a paragraph
+				that will have the initial spaces
+				stripped.
+		`
+
+Structure: Blocks of Spans
+
+Like CommonMark, every BonzaiMark document consists of one or more
+blocks which contain one or more spans of the following type:
+
+    plain
+    *italic*
+    **bold**
+    ***bolditalic***
+    <under> (brackets remain)
+
+Unlike CommonMark, spans may not contain any other span type.
+
+While the underline format is not supported in CommonMark, underlining is frequently used in place of italic for most terminals. Angle brackets are, however, supported in CommonMark.
+
+The following limited block types are supported. (All others from CommonMark are not):
+
+    * Paragraph
+		* Verbatim
+		* Numbered List
+		* Bulleted List
+		* Numbered Long List Item
+		* Bulleted Long List Item
+
+Unlike CommonMark, all blocks must be separated by two or more line returns.
+
+Paragraph Blocks
+
+Paragraph blocks are the most common. They consist of one or more of spans.
+
+Verbatim Blocks
+
+A Verbatim block will be included exactly as typed. It begins with the
+first line that has four or more spaces followed by a non-whitespace
+character. The block continues until the next block is detected. (All
+other blocks must begin on the first column (after stripped
+indentation).
+
+Lists
+
+Like CommonMark there are long lists and compact lists. Long lists are
+actually multiple consecutive long list item blocks whereas compact
+lists consist of list items that are on one line after another (no
+double-return block separator).
+
+Lists are either numbered or bulleted and never exceed one level (no
+nesting). Both list types may contain any number of paragraph blocks but
+most will only contain the one. When multiple paragraph blocks are
+wanted the subsequent paragraph block must begin on the same column as
+the first character of the first line of the first list item paragraph;
+they must line up.
+
+    1. **Keep list item paragraphs lined up**
+
+		   This is a second paragraph block under the same list item because
+			 it lines up with the first line of the first paragraph
+
+blank lines -- over multiple consecutive lines but each line after the
+first must line up exactly with the first character of the first list
+item line to be considered still a part of the list item.
+
+
+Numbered Lists
+
+Numbered lists always begin with a go integer and a dot (.).
+Conventionally a 1. is used for everything so that document maintainers
+can quickly reorganize when needed without a tool for renumbered. Nested
+lists are not supported. Each item in the list must be on immediate
+subsequent lines. Numbered lists must always be rendered with Arabic
+numerals.
+
+Bulleted Lists
+
+Bulleted lists must begin with a single asterisk (*) followed by
+a single space. No other bullet type from CommonMark is supported.
+Nested lists are not supported. Each item in a list must be on an
+immediately subsequent line. List items may contain any number of spans
+over multiple consecutive lines but each line after the first must line
+up exactly with the first character of the first list item line.
+
+    * This is a
+		  list item
+
+Only Inline Links
+
+Only explicit link URLs are supported. The must always be wrapped with
+angle brackets (<>). Technically inline links are a span of type "under"
+which also gives them an underline emphasis on the terminal.
+
+No Escapes
+
+There is no support for escaping anything in BonzaiMark. (CommonMark allows the placement of a backslash to remove any special meaning.) Therefore, most authors will use verbatim blocks when it is necessary to use the reserved BonzaiMark tokens in other ways.
+
+
+
+Soft and Hard Line Endings
+
+Like CommonMark lines that follow other lines immediately are
+effectively joined together unless there are two or more spaces at the
+end of the line (a hard return). This is after any indentation has been
+removed (see Stripped Indentation).
+
+
+
+of spaces for the first line of indentation.
+Any line beginning with at least four spaces (after trimming
+indentation) will be kept verbatim.
+
+Emphasis will be applied as possible if the following markup is
+detected:
+
+Note that the format of the emphasis might not always be as
+specifically named. For example, most terminal do not support italic
+fonts and so will instead underline *italic* text, so (as specified
+in HTML5 for <i>, for example) these format names should be taken to
+mean their semantic equivalents.
+
+For terminal rendering details see the rwxrob/term package.
+*/
+
+// Mark
+
+func Mark(in string) string {
+	if in == "" {
+		return ""
+	}
+
+	//var out string
+	blocks := Blocks(in)
+	log.Print(blocks)
+
+	//out := to.Dedented(markup)
+	//out, _ = to.Wrapped(out, 80)
+	//out = Emph(out)
+	//return out
+	return ""
+}
+
+// Blocks strips preceding and trailing white space and then checks the
+// first line for indentation (spaces or tabs) and strips that exact
+// indentation string from every line. It then breaks up the input into
+// blocks separated by one or more empty lines and applies basic
+// formatting to each as follows:
+//
+//     If is one of the following leave alone with no wrapping:
+//
+//     * Bulleted List - beginning with *
+//     * Numbered List - beginning with 1.
+//     * Verbatim      - beginning with four spaces
+//
+//     Everything else is considered a "paragraph" and will be unwrapped
+//     into a single long line (which is normally wrapped later).
+//
+// For now, these blocks are added as is, but plans are to eventually
+// add support for short and long lists much like CommonMark.
+//
+// Note that because of the nature of Verbatim's block's initial (4
+// space) token Verbatim blocks must never be first since the entire
+// input buffer is first dedented and the spaces would grouped with the
+// indentation to be stripped. This is never a problem, however,
+// because Verbatim blocks never make sense as the first block in
+// a BonzaiMark document. This simplicity and clarity of 4-space tokens
+// far outweighs the advantages of alternatives (such as fences).
+func Blocks(in string) []string {
+
+	var blocks []string
+	verbpre := regexp.MustCompile(` {4,}`)
+	s := scan.R{Buf: []byte(to.Dedented(in))}
+
+MAIN:
+	for s.Scan() {
+
+		switch s.Rune {
+
+		case '*': // bulleted list
+			if s.Is(" ") {
+				m := s.Pos - 1
+				for s.Scan() {
+					if s.Is("\n\n") {
+						blocks = append(blocks, string(s.Buf[m:s.Pos]))
+						s.Pos += 2
+						continue MAIN
+					}
+				}
+			}
+
+		case '1': // numbered list
+			if s.Is(". ") {
+				m := s.Pos - 1
+				for s.Scan() {
+					if s.Is("\n\n") {
+						blocks = append(blocks, string(s.Buf[m:s.Pos]))
+						s.Pos += 2
+						continue MAIN
+					}
+				}
+			}
+
+		case ' ': // verbatim
+			s.Pos -= 1
+			ln := s.Match(verbpre)
+			s.Pos++
+
+			if ln < 0 {
+				continue
+			}
+			pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
+			s.Pos += len(pre) - 1
+
+			block := []rune{}
+			for s.Scan() {
+
+				if s.Rune == '\n' {
+
+					// add in indented lines
+					if s.Is(string(pre)) {
+						block = append(block, '\n')
+						s.Pos += len(pre)
+						continue
+					}
+
+					// end of the block
+					blocks = append(blocks, string(block))
+					continue MAIN
+				}
+
+				block = append(block, s.Rune)
+			}
+
+		case '\n', '\r', '\t': // inconsequential white space
+			continue
+
+		default: // paragraph
+			block := []rune{s.Rune}
+			for s.Scan() {
+				switch s.Rune {
+				case '\n', '\r':
+					block = append(block, ' ')
+				default:
+					block = append(block, s.Rune)
+				}
+				if s.Is("\n\n") {
+					blocks = append(blocks, string(block))
+					s.Scan()
+					s.Scan()
+					continue MAIN
+				}
+			}
+
+		}
+
+	}
+	return blocks
+}
--- a/z/mark_test.go
+++ b/z/mark_test.go
@ -0,0 +1,196 @@
+package Z_test
+
+import (
+	"fmt"
+
+	Z "github.com/rwxrob/bonzai/z"
+	"github.com/rwxrob/term"
+)
+
+func ExampleEmph_basics() {
+
+	// Emph observes the rwxrob/term escapes
+	// (see package documentation for more)
+
+	term.Italic = `<italic>`
+	term.Bold = `<bold>`
+	term.BoldItalic = `<bolditalic>`
+	term.Under = `<under>`
+	term.Reset = `<reset>`
+
+	fmt.Println(Z.Emph("*ITALIC*"))
+	fmt.Println(Z.Emph("**BOLD**"))
+	fmt.Println(Z.Emph("***BOLDITALIC***"))
+	fmt.Println(Z.Emph("<UNDER>")) // keeps brackets
+
+	// Output:
+	// <italic>ITALIC<reset>
+	// <bold>BOLD<reset>
+	// <bolditalic>BOLDITALIC<reset>
+	// <<under>UNDER<reset>>
+
+}
+
+func ExampleWrap() {
+	defer func() { Z.Columns = Z.Columns }()
+	Z.Columns = 10
+	fmt.Println(Z.Wrap(`some thing here that is more than 10 characters`))
+	// Output:
+	// some thing
+	// here that
+	// is more
+	// than 10
+	// characters
+}
+
+func ExampleIndent() {
+	defer func() { Z.IndentBy = Z.IndentBy }()
+	Z.IndentBy = 4
+	fmt.Printf("%q", Z.Indent("some\nthat is \n  indented"))
+	// Output:
+	// "    some\n    that is \n      indented\n"
+}
+
+func ExampleInWrap() {
+	defer func() { Z.IndentBy = Z.IndentBy }()
+	defer func() { Z.Columns = Z.Columns }()
+	Z.IndentBy = 4
+	Z.Columns = 10
+	fmt.Printf("%q", Z.InWrap("some\nthat is \n  indented"))
+	// Output:
+	// "    some\n    that\n    is\n    indented\n"
+}
+
+func ExampleBlocks_bulleted() {
+	in := `
+
+			* some thing
+			* another thing
+
+			* another block
+			* here
+
+			`
+
+	fmt.Println(Z.Blocks(in)[1])
+
+	//Output:
+	// * another block
+	// * here
+}
+
+func ExampleBlocks_numbered() {
+	in := `
+
+			1. some thing
+			2. another thing
+
+			1. another block
+			2. here
+
+			`
+
+	fmt.Println(Z.Blocks(in)[1])
+
+	//Output:
+	// 1. another block
+	// 2. here
+}
+
+func ExampleBlocks_paragraph() {
+	in := `
+			Simple paragraph
+			here on multiple
+			lines
+
+			And another one here
+			with just a bit more.
+
+			`
+
+	fmt.Println(Z.Blocks(in)[1])
+
+	// Output:
+	// And another one here with just a bit more.
+}
+
+func ExampleBlocks_verbatim() {
+
+	// Note that the following begins consistently with three tabs so that
+	// dedenting works consistently. There are four spaces before Now and
+	// the verbatim block. Notice that even the blank line within the
+	// verbatim block must have the exact same indentation and spaced
+	// verbatim prefix. (If using Vi/m try set :list to display them.)
+
+	in := `
+			Must have another block type first.
+
+			     Now we can start
+			     a Verbatim
+			     block.
+			     
+			     Which can have blank lines, even.
+
+			And back to a paragraph block.
+
+			`
+
+	fmt.Printf("%q\n", Z.Blocks(in)[0])
+	fmt.Printf("%q\n", Z.Blocks(in)[1])
+	fmt.Printf("%q\n", Z.Blocks(in)[2])
+
+	//Output:
+	// "Must have another block type first."
+	// "Now we can start\na Verbatim\nblock.\n\nWhich can have blank lines, even."
+	// "And back to a paragraph block."
+
+}
+
+// Now we can start
+// a Verbatim
+// block.
+//
+// Which can have blank lines, even.
+
+/*
+func ExampleBlocks() {
+	in := `
+
+		 Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+		 eiusmod tempor incididunt ut labore et dolore magna aliqua.
+
+		 Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
+		 nisi ut aliquip ex ea commodo consequat.
+
+		 Duis aute irure dolor in reprehenderit in voluptate velit esse
+		 cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
+		 cupidatat non proident, sunt in culpa qui officia deserunt mollit
+		 anim id est laborum.
+
+		 `
+
+	fmt.Println(Z.Blocks(in))
+
+	// Output:
+	// some thing
+}
+*/
+
+/*
+func ExampleFormat_remove_Initial_Blanks() {
+	fmt.Printf("%q\n", Z.Format("\n   \n\n  \n   some"))
+	// Output:
+	// "some"
+}
+
+func ExampleFormat_wrapping() {
+	fmt.Println(Z.Format(`
+Here is a bunch of stuff just to fill the line beyond 80 columns so that it will wrap when it is supposed to and right now
+as well if there was a hard return in the middle of a line.
+`))
+	// Output:
+	// Here is a bunch of stuff just to fill the line beyond 80 columns so that it will
+	// wrap when it is supposed to and right now
+	// as well if there was a hard return in the middle of a line.
+}
+*/
--- a/z/usage.go
+++ b/z/usage.go
@ -0,0 +1,52 @@
+package Z
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/rwxrob/fn/filt"
+)
+
+// UsageGroup uses Bonzai usage notation, a basic form of regular
+// expressions, to describe the arguments allowed where each argument is
+// a literal string (avoid spaces). The arguments are joined with bars
+// (|) and wrapped with parentheses producing a regex group.  The min
+// and max are then applied by adding the following regex decorations
+// after the final parenthesis:
+//
+//                - min=1 max=1 (exactly one)
+//     ?          - min=0 max=0 (none or many)
+//     +          - min=1 max=0 (one or more)
+//     {min,}     - min>0 max=0 (min, no max)
+//     {min,max}  - min>0 max>0 (min and max)
+//     {,max}     - min=0 max>0 (max, no min)
+//
+// An empty args slice returns an empty string. If only one arg, then
+// that arg is simply returned and min and max are ignored. Arguments
+// that are empty strings are ignored. No transformation is done to the
+// string itself (such as removing white space).
+func UsageGroup(args []string, min, max int) string {
+	args = filt.NotEmpty(args)
+	switch len(args) {
+	case 0:
+		return ""
+	case 1:
+		return args[0]
+	default:
+		var dec string
+		switch {
+		case min == 1 && max == 1:
+		case min == 0 && max == 0:
+			dec = "?"
+		case min == 1 && max == 0:
+			dec = "+"
+		case min > 1 && max == 0:
+			dec = fmt.Sprintf("{%v,}", min)
+		case min > 0 && max > 0:
+			dec = fmt.Sprintf("{%v,%v}", min, max)
+		case min == 0 && max > 1:
+			dec = fmt.Sprintf("{,%v}", max)
+		}
+		return "(" + strings.Join(args, "|") + ")" + dec
+	}
+}