Blocks working

pull/66/head
rwxrob 2 years ago
parent dcbcc5d34f
commit 98ac4f744a
No known key found for this signature in database
GPG Key ID: 2B9111F33082AE77

@ -0,0 +1,26 @@
package Z
import "github.com/rwxrob/fn"
// EscThese is set to the default UNIX shell characters which require
// escaping to be used safely on the terminal. It can be changed to suit
// the needs of different host shell environments.
var EscThese = " \r\t\n|&;()<>![]"
// Esc returns a shell-escaped version of the string s. The returned value
// is a string that can safely be used as one token in a shell command line.
func Esc(s string) string {
var buf []rune
for _, r := range s {
for _, esc := range EscThese {
if r == esc {
buf = append(buf, '\\')
}
}
buf = append(buf, r)
}
return string(buf)
}
// EscAll calls Esc on all passed strings.
func EscAll(args []string) []string { return fn.Map(args, Esc) }

@ -0,0 +1,417 @@
package Z
import (
"log"
"regexp"
"unicode"
"github.com/rwxrob/scan"
"github.com/rwxrob/term"
"github.com/rwxrob/to"
)
// This file contains BonzaiMark.
// IndentBy is the number of spaces to indent in Indent. Default is 7.
// Bonzai command tree creator can change this for every composite
// command imported their application in this one place.
var IndentBy = 7
// Columns is the number of bytes (not runes) at which Wrap will wrap.
// Default is 80. Bonzai command tree creator can change this for every
// composite command imported their application in this one place.
var Columns = 80
// Emph renders BonzaiMark emphasis spans specifically for
// VT100-compatible terminals (which almost all are today):
//
// *Italic*
// **Bold**
// ***BoldItalic***
// <under> (keeping brackets)
//
// See Mark for block formatting and rwxrob/term for terminal rendering.
func Emph(buf string) string {
var nbuf []rune
var opentok, closetok bool
var otok, ctok string
prev := ' '
for i := 0; i < len([]rune(buf)); i++ {
r := []rune(buf)[i]
if r == '<' {
nbuf = append(nbuf, '<')
nbuf = append(nbuf, []rune(term.Under)...)
for {
i++
r = rune(buf[i])
if r == '>' {
i++
break
}
nbuf = append(nbuf, r)
}
nbuf = append(nbuf, []rune(term.Reset)...)
nbuf = append(nbuf, '>')
i--
continue
}
if r != '*' {
if opentok {
tokval := " "
if !unicode.IsSpace(r) {
switch otok {
case "*":
tokval = term.Italic
case "**":
tokval = term.Bold
case "***":
tokval = term.BoldItalic
}
} else {
tokval = otok
}
nbuf = append(nbuf, []rune(tokval)...)
opentok = false
otok = ""
}
if closetok {
nbuf = append(nbuf, []rune(term.Reset)...) // practical, not perfect
ctok = ""
closetok = false
}
prev = r
nbuf = append(nbuf, r)
continue
}
// everything else for '*'
if unicode.IsSpace(prev) || opentok {
opentok = true
otok += string(r)
continue
}
// only closer conditions remain
if !unicode.IsSpace(prev) {
closetok = true
ctok += string(r)
continue
}
// nothing special
closetok = false
nbuf = append(nbuf, r)
}
// for tokens at the end of a block
if closetok {
nbuf = append(nbuf, []rune(term.Reset)...)
}
return string(nbuf)
}
// Wrap wraps to Columns width.
func Wrap(in string) string { w, _ := to.Wrapped(in, Columns); return w }
// Indent indents the number of spaces set by IndentBy.
func Indent(in string) string { return to.Indented(in, IndentBy) }
// InWrap combines both Wrap and Indent.
func InWrap(in string) string {
w, _ := to.Wrapped(in, Columns-IndentBy)
return to.Indented(w, IndentBy)
}
// ---------------------------- finish this ---------------------------
// TODO finish the full Mark implementation and documentation
/*
Mark renders BonzaiMark markup as formatted VT100-compatible terminal
output, wrapped and indented as specified by the package variables
IndentBy and Columns.
BonzaiMark
BonzaiMark is a minimal subset of CommonMark (Markdown) suitable for
rendering to terminals, web pages, PDF, or plain text. The syntax is
deliberately simple and easy to read (much like standard GoDoc) but rich
enough to look well rendered in documents as well as the terminal.
Stripped Indentation
Unlike CommonMark, BonzaiMark ignores any number of blank lines or
whitespace before the first line and uses the initial characters
preceding the first line (tabs or spaces) as a basis for what to strip
from every subsequent line of the document. All trailing white space and
blank lines are also discarded. This allows BonzaiMark to be included in
source code in very readable ways (preferably with backtick string
literals).
some := `
Here is a paragraph
that will have the initial spaces
stripped.
`
Structure: Blocks of Spans
Like CommonMark, every BonzaiMark document consists of one or more
blocks which contain one or more spans of the following type:
plain
*italic*
**bold**
***bolditalic***
<under> (brackets remain)
Unlike CommonMark, spans may not contain any other span type.
While the underline format is not supported in CommonMark, underlining is frequently used in place of italic for most terminals. Angle brackets are, however, supported in CommonMark.
The following limited block types are supported. (All others from CommonMark are not):
* Paragraph
* Verbatim
* Numbered List
* Bulleted List
* Numbered Long List Item
* Bulleted Long List Item
Unlike CommonMark, all blocks must be separated by two or more line returns.
Paragraph Blocks
Paragraph blocks are the most common. They consist of one or more of spans.
Verbatim Blocks
A Verbatim block will be included exactly as typed. It begins with the
first line that has four or more spaces followed by a non-whitespace
character. The block continues until the next block is detected. (All
other blocks must begin on the first column (after stripped
indentation).
Lists
Like CommonMark there are long lists and compact lists. Long lists are
actually multiple consecutive long list item blocks whereas compact
lists consist of list items that are on one line after another (no
double-return block separator).
Lists are either numbered or bulleted and never exceed one level (no
nesting). Both list types may contain any number of paragraph blocks but
most will only contain the one. When multiple paragraph blocks are
wanted the subsequent paragraph block must begin on the same column as
the first character of the first line of the first list item paragraph;
they must line up.
1. **Keep list item paragraphs lined up**
This is a second paragraph block under the same list item because
it lines up with the first line of the first paragraph
blank lines -- over multiple consecutive lines but each line after the
first must line up exactly with the first character of the first list
item line to be considered still a part of the list item.
Numbered Lists
Numbered lists always begin with a go integer and a dot (.).
Conventionally a 1. is used for everything so that document maintainers
can quickly reorganize when needed without a tool for renumbered. Nested
lists are not supported. Each item in the list must be on immediate
subsequent lines. Numbered lists must always be rendered with Arabic
numerals.
Bulleted Lists
Bulleted lists must begin with a single asterisk (*) followed by
a single space. No other bullet type from CommonMark is supported.
Nested lists are not supported. Each item in a list must be on an
immediately subsequent line. List items may contain any number of spans
over multiple consecutive lines but each line after the first must line
up exactly with the first character of the first list item line.
* This is a
list item
Only Inline Links
Only explicit link URLs are supported. The must always be wrapped with
angle brackets (<>). Technically inline links are a span of type "under"
which also gives them an underline emphasis on the terminal.
No Escapes
There is no support for escaping anything in BonzaiMark. (CommonMark allows the placement of a backslash to remove any special meaning.) Therefore, most authors will use verbatim blocks when it is necessary to use the reserved BonzaiMark tokens in other ways.
Soft and Hard Line Endings
Like CommonMark lines that follow other lines immediately are
effectively joined together unless there are two or more spaces at the
end of the line (a hard return). This is after any indentation has been
removed (see Stripped Indentation).
of spaces for the first line of indentation.
Any line beginning with at least four spaces (after trimming
indentation) will be kept verbatim.
Emphasis will be applied as possible if the following markup is
detected:
Note that the format of the emphasis might not always be as
specifically named. For example, most terminal do not support italic
fonts and so will instead underline *italic* text, so (as specified
in HTML5 for <i>, for example) these format names should be taken to
mean their semantic equivalents.
For terminal rendering details see the rwxrob/term package.
*/
// Mark
func Mark(in string) string {
if in == "" {
return ""
}
//var out string
blocks := Blocks(in)
log.Print(blocks)
//out := to.Dedented(markup)
//out, _ = to.Wrapped(out, 80)
//out = Emph(out)
//return out
return ""
}
// Blocks strips preceding and trailing white space and then checks the
// first line for indentation (spaces or tabs) and strips that exact
// indentation string from every line. It then breaks up the input into
// blocks separated by one or more empty lines and applies basic
// formatting to each as follows:
//
// If is one of the following leave alone with no wrapping:
//
// * Bulleted List - beginning with *
// * Numbered List - beginning with 1.
// * Verbatim - beginning with four spaces
//
// Everything else is considered a "paragraph" and will be unwrapped
// into a single long line (which is normally wrapped later).
//
// For now, these blocks are added as is, but plans are to eventually
// add support for short and long lists much like CommonMark.
//
// Note that because of the nature of Verbatim's block's initial (4
// space) token Verbatim blocks must never be first since the entire
// input buffer is first dedented and the spaces would grouped with the
// indentation to be stripped. This is never a problem, however,
// because Verbatim blocks never make sense as the first block in
// a BonzaiMark document. This simplicity and clarity of 4-space tokens
// far outweighs the advantages of alternatives (such as fences).
func Blocks(in string) []string {
var blocks []string
verbpre := regexp.MustCompile(` {4,}`)
s := scan.R{Buf: []byte(to.Dedented(in))}
MAIN:
for s.Scan() {
switch s.Rune {
case '*': // bulleted list
if s.Is(" ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, string(s.Buf[m:s.Pos]))
s.Pos += 2
continue MAIN
}
}
}
case '1': // numbered list
if s.Is(". ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, string(s.Buf[m:s.Pos]))
s.Pos += 2
continue MAIN
}
}
}
case ' ': // verbatim
s.Pos -= 1
ln := s.Match(verbpre)
s.Pos++
if ln < 0 {
continue
}
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
s.Pos += len(pre) - 1
block := []rune{}
for s.Scan() {
if s.Rune == '\n' {
// add in indented lines
if s.Is(string(pre)) {
block = append(block, '\n')
s.Pos += len(pre)
continue
}
// end of the block
blocks = append(blocks, string(block))
continue MAIN
}
block = append(block, s.Rune)
}
case '\n', '\r', '\t': // inconsequential white space
continue
default: // paragraph
block := []rune{s.Rune}
for s.Scan() {
switch s.Rune {
case '\n', '\r':
block = append(block, ' ')
default:
block = append(block, s.Rune)
}
if s.Is("\n\n") {
blocks = append(blocks, string(block))
s.Scan()
s.Scan()
continue MAIN
}
}
}
}
return blocks
}

@ -0,0 +1,196 @@
package Z_test
import (
"fmt"
Z "github.com/rwxrob/bonzai/z"
"github.com/rwxrob/term"
)
func ExampleEmph_basics() {
// Emph observes the rwxrob/term escapes
// (see package documentation for more)
term.Italic = `<italic>`
term.Bold = `<bold>`
term.BoldItalic = `<bolditalic>`
term.Under = `<under>`
term.Reset = `<reset>`
fmt.Println(Z.Emph("*ITALIC*"))
fmt.Println(Z.Emph("**BOLD**"))
fmt.Println(Z.Emph("***BOLDITALIC***"))
fmt.Println(Z.Emph("<UNDER>")) // keeps brackets
// Output:
// <italic>ITALIC<reset>
// <bold>BOLD<reset>
// <bolditalic>BOLDITALIC<reset>
// <<under>UNDER<reset>>
}
func ExampleWrap() {
defer func() { Z.Columns = Z.Columns }()
Z.Columns = 10
fmt.Println(Z.Wrap(`some thing here that is more than 10 characters`))
// Output:
// some thing
// here that
// is more
// than 10
// characters
}
func ExampleIndent() {
defer func() { Z.IndentBy = Z.IndentBy }()
Z.IndentBy = 4
fmt.Printf("%q", Z.Indent("some\nthat is \n indented"))
// Output:
// " some\n that is \n indented\n"
}
func ExampleInWrap() {
defer func() { Z.IndentBy = Z.IndentBy }()
defer func() { Z.Columns = Z.Columns }()
Z.IndentBy = 4
Z.Columns = 10
fmt.Printf("%q", Z.InWrap("some\nthat is \n indented"))
// Output:
// " some\n that\n is\n indented\n"
}
func ExampleBlocks_bulleted() {
in := `
* some thing
* another thing
* another block
* here
`
fmt.Println(Z.Blocks(in)[1])
//Output:
// * another block
// * here
}
func ExampleBlocks_numbered() {
in := `
1. some thing
2. another thing
1. another block
2. here
`
fmt.Println(Z.Blocks(in)[1])
//Output:
// 1. another block
// 2. here
}
func ExampleBlocks_paragraph() {
in := `
Simple paragraph
here on multiple
lines
And another one here
with just a bit more.
`
fmt.Println(Z.Blocks(in)[1])
// Output:
// And another one here with just a bit more.
}
func ExampleBlocks_verbatim() {
// Note that the following begins consistently with three tabs so that
// dedenting works consistently. There are four spaces before Now and
// the verbatim block. Notice that even the blank line within the
// verbatim block must have the exact same indentation and spaced
// verbatim prefix. (If using Vi/m try set :list to display them.)
in := `
Must have another block type first.
Now we can start
a Verbatim
block.
Which can have blank lines, even.
And back to a paragraph block.
`
fmt.Printf("%q\n", Z.Blocks(in)[0])
fmt.Printf("%q\n", Z.Blocks(in)[1])
fmt.Printf("%q\n", Z.Blocks(in)[2])
//Output:
// "Must have another block type first."
// "Now we can start\na Verbatim\nblock.\n\nWhich can have blank lines, even."
// "And back to a paragraph block."
}
// Now we can start
// a Verbatim
// block.
//
// Which can have blank lines, even.
/*
func ExampleBlocks() {
in := `
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
cupidatat non proident, sunt in culpa qui officia deserunt mollit
anim id est laborum.
`
fmt.Println(Z.Blocks(in))
// Output:
// some thing
}
*/
/*
func ExampleFormat_remove_Initial_Blanks() {
fmt.Printf("%q\n", Z.Format("\n \n\n \n some"))
// Output:
// "some"
}
func ExampleFormat_wrapping() {
fmt.Println(Z.Format(`
Here is a bunch of stuff just to fill the line beyond 80 columns so that it will wrap when it is supposed to and right now
as well if there was a hard return in the middle of a line.
`))
// Output:
// Here is a bunch of stuff just to fill the line beyond 80 columns so that it will
// wrap when it is supposed to and right now
// as well if there was a hard return in the middle of a line.
}
*/

@ -0,0 +1,52 @@
package Z
import (
"fmt"
"strings"
"github.com/rwxrob/fn/filt"
)
// UsageGroup uses Bonzai usage notation, a basic form of regular
// expressions, to describe the arguments allowed where each argument is
// a literal string (avoid spaces). The arguments are joined with bars
// (|) and wrapped with parentheses producing a regex group. The min
// and max are then applied by adding the following regex decorations
// after the final parenthesis:
//
// - min=1 max=1 (exactly one)
// ? - min=0 max=0 (none or many)
// + - min=1 max=0 (one or more)
// {min,} - min>0 max=0 (min, no max)
// {min,max} - min>0 max>0 (min and max)
// {,max} - min=0 max>0 (max, no min)
//
// An empty args slice returns an empty string. If only one arg, then
// that arg is simply returned and min and max are ignored. Arguments
// that are empty strings are ignored. No transformation is done to the
// string itself (such as removing white space).
func UsageGroup(args []string, min, max int) string {
args = filt.NotEmpty(args)
switch len(args) {
case 0:
return ""
case 1:
return args[0]
default:
var dec string
switch {
case min == 1 && max == 1:
case min == 0 && max == 0:
dec = "?"
case min == 1 && max == 0:
dec = "+"
case min > 1 && max == 0:
dec = fmt.Sprintf("{%v,}", min)
case min > 0 && max > 0:
dec = fmt.Sprintf("{%v,%v}", min, max)
case min == 0 && max > 1:
dec = fmt.Sprintf("{,%v}", max)
}
return "(" + strings.Join(args, "|") + ")" + dec
}
}
Loading…
Cancel
Save