diff --git a/z/esc.go b/z/esc.go new file mode 100644 index 0000000..35061b2 --- /dev/null +++ b/z/esc.go @@ -0,0 +1,26 @@ +package Z + +import "github.com/rwxrob/fn" + +// EscThese is set to the default UNIX shell characters which require +// escaping to be used safely on the terminal. It can be changed to suit +// the needs of different host shell environments. +var EscThese = " \r\t\n|&;()<>![]" + +// Esc returns a shell-escaped version of the string s. The returned value +// is a string that can safely be used as one token in a shell command line. +func Esc(s string) string { + var buf []rune + for _, r := range s { + for _, esc := range EscThese { + if r == esc { + buf = append(buf, '\\') + } + } + buf = append(buf, r) + } + return string(buf) +} + +// EscAll calls Esc on all passed strings. +func EscAll(args []string) []string { return fn.Map(args, Esc) } diff --git a/z/mark.go b/z/mark.go new file mode 100644 index 0000000..76c861b --- /dev/null +++ b/z/mark.go @@ -0,0 +1,417 @@ +package Z + +import ( + "log" + "regexp" + "unicode" + + "github.com/rwxrob/scan" + "github.com/rwxrob/term" + "github.com/rwxrob/to" +) + +// This file contains BonzaiMark. + +// IndentBy is the number of spaces to indent in Indent. Default is 7. +// Bonzai command tree creator can change this for every composite +// command imported their application in this one place. +var IndentBy = 7 + +// Columns is the number of bytes (not runes) at which Wrap will wrap. +// Default is 80. Bonzai command tree creator can change this for every +// composite command imported their application in this one place. +var Columns = 80 + +// Emph renders BonzaiMark emphasis spans specifically for +// VT100-compatible terminals (which almost all are today): +// +// *Italic* +// **Bold** +// ***BoldItalic*** +// (keeping brackets) +// +// See Mark for block formatting and rwxrob/term for terminal rendering. +func Emph(buf string) string { + var nbuf []rune + var opentok, closetok bool + var otok, ctok string + prev := ' ' + + for i := 0; i < len([]rune(buf)); i++ { + r := []rune(buf)[i] + + if r == '<' { + nbuf = append(nbuf, '<') + nbuf = append(nbuf, []rune(term.Under)...) + for { + i++ + r = rune(buf[i]) + if r == '>' { + i++ + break + } + nbuf = append(nbuf, r) + } + nbuf = append(nbuf, []rune(term.Reset)...) + nbuf = append(nbuf, '>') + i-- + continue + } + + if r != '*' { + + if opentok { + tokval := " " + if !unicode.IsSpace(r) { + switch otok { + case "*": + tokval = term.Italic + case "**": + tokval = term.Bold + case "***": + tokval = term.BoldItalic + } + } else { + tokval = otok + } + nbuf = append(nbuf, []rune(tokval)...) + opentok = false + otok = "" + } + + if closetok { + nbuf = append(nbuf, []rune(term.Reset)...) // practical, not perfect + ctok = "" + closetok = false + } + + prev = r + nbuf = append(nbuf, r) + continue + } + + // everything else for '*' + if unicode.IsSpace(prev) || opentok { + opentok = true + otok += string(r) + continue + } + + // only closer conditions remain + if !unicode.IsSpace(prev) { + closetok = true + ctok += string(r) + continue + } + + // nothing special + closetok = false + nbuf = append(nbuf, r) + } + + // for tokens at the end of a block + if closetok { + nbuf = append(nbuf, []rune(term.Reset)...) + } + + return string(nbuf) +} + +// Wrap wraps to Columns width. +func Wrap(in string) string { w, _ := to.Wrapped(in, Columns); return w } + +// Indent indents the number of spaces set by IndentBy. +func Indent(in string) string { return to.Indented(in, IndentBy) } + +// InWrap combines both Wrap and Indent. +func InWrap(in string) string { + w, _ := to.Wrapped(in, Columns-IndentBy) + return to.Indented(w, IndentBy) +} + +// ---------------------------- finish this --------------------------- + +// TODO finish the full Mark implementation and documentation + +/* +Mark renders BonzaiMark markup as formatted VT100-compatible terminal +output, wrapped and indented as specified by the package variables +IndentBy and Columns. + +BonzaiMark + +BonzaiMark is a minimal subset of CommonMark (Markdown) suitable for +rendering to terminals, web pages, PDF, or plain text. The syntax is +deliberately simple and easy to read (much like standard GoDoc) but rich +enough to look well rendered in documents as well as the terminal. + +Stripped Indentation + +Unlike CommonMark, BonzaiMark ignores any number of blank lines or +whitespace before the first line and uses the initial characters +preceding the first line (tabs or spaces) as a basis for what to strip +from every subsequent line of the document. All trailing white space and +blank lines are also discarded. This allows BonzaiMark to be included in +source code in very readable ways (preferably with backtick string +literals). + + some := ` + Here is a paragraph + that will have the initial spaces + stripped. + ` + +Structure: Blocks of Spans + +Like CommonMark, every BonzaiMark document consists of one or more +blocks which contain one or more spans of the following type: + + plain + *italic* + **bold** + ***bolditalic*** + (brackets remain) + +Unlike CommonMark, spans may not contain any other span type. + +While the underline format is not supported in CommonMark, underlining is frequently used in place of italic for most terminals. Angle brackets are, however, supported in CommonMark. + +The following limited block types are supported. (All others from CommonMark are not): + + * Paragraph + * Verbatim + * Numbered List + * Bulleted List + * Numbered Long List Item + * Bulleted Long List Item + +Unlike CommonMark, all blocks must be separated by two or more line returns. + +Paragraph Blocks + +Paragraph blocks are the most common. They consist of one or more of spans. + +Verbatim Blocks + +A Verbatim block will be included exactly as typed. It begins with the +first line that has four or more spaces followed by a non-whitespace +character. The block continues until the next block is detected. (All +other blocks must begin on the first column (after stripped +indentation). + +Lists + +Like CommonMark there are long lists and compact lists. Long lists are +actually multiple consecutive long list item blocks whereas compact +lists consist of list items that are on one line after another (no +double-return block separator). + +Lists are either numbered or bulleted and never exceed one level (no +nesting). Both list types may contain any number of paragraph blocks but +most will only contain the one. When multiple paragraph blocks are +wanted the subsequent paragraph block must begin on the same column as +the first character of the first line of the first list item paragraph; +they must line up. + + 1. **Keep list item paragraphs lined up** + + This is a second paragraph block under the same list item because + it lines up with the first line of the first paragraph + +blank lines -- over multiple consecutive lines but each line after the +first must line up exactly with the first character of the first list +item line to be considered still a part of the list item. + + +Numbered Lists + +Numbered lists always begin with a go integer and a dot (.). +Conventionally a 1. is used for everything so that document maintainers +can quickly reorganize when needed without a tool for renumbered. Nested +lists are not supported. Each item in the list must be on immediate +subsequent lines. Numbered lists must always be rendered with Arabic +numerals. + +Bulleted Lists + +Bulleted lists must begin with a single asterisk (*) followed by +a single space. No other bullet type from CommonMark is supported. +Nested lists are not supported. Each item in a list must be on an +immediately subsequent line. List items may contain any number of spans +over multiple consecutive lines but each line after the first must line +up exactly with the first character of the first list item line. + + * This is a + list item + +Only Inline Links + +Only explicit link URLs are supported. The must always be wrapped with +angle brackets (<>). Technically inline links are a span of type "under" +which also gives them an underline emphasis on the terminal. + +No Escapes + +There is no support for escaping anything in BonzaiMark. (CommonMark allows the placement of a backslash to remove any special meaning.) Therefore, most authors will use verbatim blocks when it is necessary to use the reserved BonzaiMark tokens in other ways. + + + +Soft and Hard Line Endings + +Like CommonMark lines that follow other lines immediately are +effectively joined together unless there are two or more spaces at the +end of the line (a hard return). This is after any indentation has been +removed (see Stripped Indentation). + + + +of spaces for the first line of indentation. +Any line beginning with at least four spaces (after trimming +indentation) will be kept verbatim. + +Emphasis will be applied as possible if the following markup is +detected: + +Note that the format of the emphasis might not always be as +specifically named. For example, most terminal do not support italic +fonts and so will instead underline *italic* text, so (as specified +in HTML5 for , for example) these format names should be taken to +mean their semantic equivalents. + +For terminal rendering details see the rwxrob/term package. +*/ + +// Mark + +func Mark(in string) string { + if in == "" { + return "" + } + + //var out string + blocks := Blocks(in) + log.Print(blocks) + + //out := to.Dedented(markup) + //out, _ = to.Wrapped(out, 80) + //out = Emph(out) + //return out + return "" +} + +// Blocks strips preceding and trailing white space and then checks the +// first line for indentation (spaces or tabs) and strips that exact +// indentation string from every line. It then breaks up the input into +// blocks separated by one or more empty lines and applies basic +// formatting to each as follows: +// +// If is one of the following leave alone with no wrapping: +// +// * Bulleted List - beginning with * +// * Numbered List - beginning with 1. +// * Verbatim - beginning with four spaces +// +// Everything else is considered a "paragraph" and will be unwrapped +// into a single long line (which is normally wrapped later). +// +// For now, these blocks are added as is, but plans are to eventually +// add support for short and long lists much like CommonMark. +// +// Note that because of the nature of Verbatim's block's initial (4 +// space) token Verbatim blocks must never be first since the entire +// input buffer is first dedented and the spaces would grouped with the +// indentation to be stripped. This is never a problem, however, +// because Verbatim blocks never make sense as the first block in +// a BonzaiMark document. This simplicity and clarity of 4-space tokens +// far outweighs the advantages of alternatives (such as fences). +func Blocks(in string) []string { + + var blocks []string + verbpre := regexp.MustCompile(` {4,}`) + s := scan.R{Buf: []byte(to.Dedented(in))} + +MAIN: + for s.Scan() { + + switch s.Rune { + + case '*': // bulleted list + if s.Is(" ") { + m := s.Pos - 1 + for s.Scan() { + if s.Is("\n\n") { + blocks = append(blocks, string(s.Buf[m:s.Pos])) + s.Pos += 2 + continue MAIN + } + } + } + + case '1': // numbered list + if s.Is(". ") { + m := s.Pos - 1 + for s.Scan() { + if s.Is("\n\n") { + blocks = append(blocks, string(s.Buf[m:s.Pos])) + s.Pos += 2 + continue MAIN + } + } + } + + case ' ': // verbatim + s.Pos -= 1 + ln := s.Match(verbpre) + s.Pos++ + + if ln < 0 { + continue + } + pre := s.Buf[s.Pos-1 : s.Pos+ln-1] + s.Pos += len(pre) - 1 + + block := []rune{} + for s.Scan() { + + if s.Rune == '\n' { + + // add in indented lines + if s.Is(string(pre)) { + block = append(block, '\n') + s.Pos += len(pre) + continue + } + + // end of the block + blocks = append(blocks, string(block)) + continue MAIN + } + + block = append(block, s.Rune) + } + + case '\n', '\r', '\t': // inconsequential white space + continue + + default: // paragraph + block := []rune{s.Rune} + for s.Scan() { + switch s.Rune { + case '\n', '\r': + block = append(block, ' ') + default: + block = append(block, s.Rune) + } + if s.Is("\n\n") { + blocks = append(blocks, string(block)) + s.Scan() + s.Scan() + continue MAIN + } + } + + } + + } + return blocks +} diff --git a/z/mark_test.go b/z/mark_test.go new file mode 100644 index 0000000..de5ef6e --- /dev/null +++ b/z/mark_test.go @@ -0,0 +1,196 @@ +package Z_test + +import ( + "fmt" + + Z "github.com/rwxrob/bonzai/z" + "github.com/rwxrob/term" +) + +func ExampleEmph_basics() { + + // Emph observes the rwxrob/term escapes + // (see package documentation for more) + + term.Italic = `` + term.Bold = `` + term.BoldItalic = `` + term.Under = `` + term.Reset = `` + + fmt.Println(Z.Emph("*ITALIC*")) + fmt.Println(Z.Emph("**BOLD**")) + fmt.Println(Z.Emph("***BOLDITALIC***")) + fmt.Println(Z.Emph("")) // keeps brackets + + // Output: + // ITALIC + // BOLD + // BOLDITALIC + // <UNDER> + +} + +func ExampleWrap() { + defer func() { Z.Columns = Z.Columns }() + Z.Columns = 10 + fmt.Println(Z.Wrap(`some thing here that is more than 10 characters`)) + // Output: + // some thing + // here that + // is more + // than 10 + // characters +} + +func ExampleIndent() { + defer func() { Z.IndentBy = Z.IndentBy }() + Z.IndentBy = 4 + fmt.Printf("%q", Z.Indent("some\nthat is \n indented")) + // Output: + // " some\n that is \n indented\n" +} + +func ExampleInWrap() { + defer func() { Z.IndentBy = Z.IndentBy }() + defer func() { Z.Columns = Z.Columns }() + Z.IndentBy = 4 + Z.Columns = 10 + fmt.Printf("%q", Z.InWrap("some\nthat is \n indented")) + // Output: + // " some\n that\n is\n indented\n" +} + +func ExampleBlocks_bulleted() { + in := ` + + * some thing + * another thing + + * another block + * here + + ` + + fmt.Println(Z.Blocks(in)[1]) + + //Output: + // * another block + // * here +} + +func ExampleBlocks_numbered() { + in := ` + + 1. some thing + 2. another thing + + 1. another block + 2. here + + ` + + fmt.Println(Z.Blocks(in)[1]) + + //Output: + // 1. another block + // 2. here +} + +func ExampleBlocks_paragraph() { + in := ` + Simple paragraph + here on multiple + lines + + And another one here + with just a bit more. + + ` + + fmt.Println(Z.Blocks(in)[1]) + + // Output: + // And another one here with just a bit more. +} + +func ExampleBlocks_verbatim() { + + // Note that the following begins consistently with three tabs so that + // dedenting works consistently. There are four spaces before Now and + // the verbatim block. Notice that even the blank line within the + // verbatim block must have the exact same indentation and spaced + // verbatim prefix. (If using Vi/m try set :list to display them.) + + in := ` + Must have another block type first. + + Now we can start + a Verbatim + block. + + Which can have blank lines, even. + + And back to a paragraph block. + + ` + + fmt.Printf("%q\n", Z.Blocks(in)[0]) + fmt.Printf("%q\n", Z.Blocks(in)[1]) + fmt.Printf("%q\n", Z.Blocks(in)[2]) + + //Output: + // "Must have another block type first." + // "Now we can start\na Verbatim\nblock.\n\nWhich can have blank lines, even." + // "And back to a paragraph block." + +} + +// Now we can start +// a Verbatim +// block. +// +// Which can have blank lines, even. + +/* +func ExampleBlocks() { + in := ` + + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. + + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquip ex ea commodo consequat. + + Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat + cupidatat non proident, sunt in culpa qui officia deserunt mollit + anim id est laborum. + + ` + + fmt.Println(Z.Blocks(in)) + + // Output: + // some thing +} +*/ + +/* +func ExampleFormat_remove_Initial_Blanks() { + fmt.Printf("%q\n", Z.Format("\n \n\n \n some")) + // Output: + // "some" +} + +func ExampleFormat_wrapping() { + fmt.Println(Z.Format(` +Here is a bunch of stuff just to fill the line beyond 80 columns so that it will wrap when it is supposed to and right now +as well if there was a hard return in the middle of a line. +`)) + // Output: + // Here is a bunch of stuff just to fill the line beyond 80 columns so that it will + // wrap when it is supposed to and right now + // as well if there was a hard return in the middle of a line. +} +*/ diff --git a/z/usage.go b/z/usage.go new file mode 100644 index 0000000..2f2b314 --- /dev/null +++ b/z/usage.go @@ -0,0 +1,52 @@ +package Z + +import ( + "fmt" + "strings" + + "github.com/rwxrob/fn/filt" +) + +// UsageGroup uses Bonzai usage notation, a basic form of regular +// expressions, to describe the arguments allowed where each argument is +// a literal string (avoid spaces). The arguments are joined with bars +// (|) and wrapped with parentheses producing a regex group. The min +// and max are then applied by adding the following regex decorations +// after the final parenthesis: +// +// - min=1 max=1 (exactly one) +// ? - min=0 max=0 (none or many) +// + - min=1 max=0 (one or more) +// {min,} - min>0 max=0 (min, no max) +// {min,max} - min>0 max>0 (min and max) +// {,max} - min=0 max>0 (max, no min) +// +// An empty args slice returns an empty string. If only one arg, then +// that arg is simply returned and min and max are ignored. Arguments +// that are empty strings are ignored. No transformation is done to the +// string itself (such as removing white space). +func UsageGroup(args []string, min, max int) string { + args = filt.NotEmpty(args) + switch len(args) { + case 0: + return "" + case 1: + return args[0] + default: + var dec string + switch { + case min == 1 && max == 1: + case min == 0 && max == 0: + dec = "?" + case min == 1 && max == 0: + dec = "+" + case min > 1 && max == 0: + dec = fmt.Sprintf("{%v,}", min) + case min > 0 && max > 0: + dec = fmt.Sprintf("{%v,%v}", min, max) + case min == 0 && max > 1: + dec = fmt.Sprintf("{,%v}", max) + } + return "(" + strings.Join(args, "|") + ")" + dec + } +}