mirror of https://github.com/rwxrob/bonzai
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
418 lines
11 KiB
Go
418 lines
11 KiB
Go
package Z
|
|
|
|
import (
|
|
"log"
|
|
"regexp"
|
|
"unicode"
|
|
|
|
"github.com/rwxrob/scan"
|
|
"github.com/rwxrob/term"
|
|
"github.com/rwxrob/to"
|
|
)
|
|
|
|
// This file contains BonzaiMark.
|
|
|
|
// IndentBy is the number of spaces to indent in Indent. Default is 7.
|
|
// Bonzai command tree creator can change this for every composite
|
|
// command imported their application in this one place.
|
|
var IndentBy = 7
|
|
|
|
// Columns is the number of bytes (not runes) at which Wrap will wrap.
|
|
// Default is 80. Bonzai command tree creator can change this for every
|
|
// composite command imported their application in this one place.
|
|
var Columns = 80
|
|
|
|
// Emph renders BonzaiMark emphasis spans specifically for
|
|
// VT100-compatible terminals (which almost all are today):
|
|
//
|
|
// *Italic*
|
|
// **Bold**
|
|
// ***BoldItalic***
|
|
// <under> (keeping brackets)
|
|
//
|
|
// See Mark for block formatting and rwxrob/term for terminal rendering.
|
|
func Emph(buf string) string {
|
|
var nbuf []rune
|
|
var opentok, closetok bool
|
|
var otok, ctok string
|
|
prev := ' '
|
|
|
|
for i := 0; i < len([]rune(buf)); i++ {
|
|
r := []rune(buf)[i]
|
|
|
|
if r == '<' {
|
|
nbuf = append(nbuf, '<')
|
|
nbuf = append(nbuf, []rune(term.Under)...)
|
|
for {
|
|
i++
|
|
r = rune(buf[i])
|
|
if r == '>' {
|
|
i++
|
|
break
|
|
}
|
|
nbuf = append(nbuf, r)
|
|
}
|
|
nbuf = append(nbuf, []rune(term.Reset)...)
|
|
nbuf = append(nbuf, '>')
|
|
i--
|
|
continue
|
|
}
|
|
|
|
if r != '*' {
|
|
|
|
if opentok {
|
|
tokval := " "
|
|
if !unicode.IsSpace(r) {
|
|
switch otok {
|
|
case "*":
|
|
tokval = term.Italic
|
|
case "**":
|
|
tokval = term.Bold
|
|
case "***":
|
|
tokval = term.BoldItalic
|
|
}
|
|
} else {
|
|
tokval = otok
|
|
}
|
|
nbuf = append(nbuf, []rune(tokval)...)
|
|
opentok = false
|
|
otok = ""
|
|
}
|
|
|
|
if closetok {
|
|
nbuf = append(nbuf, []rune(term.Reset)...) // practical, not perfect
|
|
ctok = ""
|
|
closetok = false
|
|
}
|
|
|
|
prev = r
|
|
nbuf = append(nbuf, r)
|
|
continue
|
|
}
|
|
|
|
// everything else for '*'
|
|
if unicode.IsSpace(prev) || opentok {
|
|
opentok = true
|
|
otok += string(r)
|
|
continue
|
|
}
|
|
|
|
// only closer conditions remain
|
|
if !unicode.IsSpace(prev) {
|
|
closetok = true
|
|
ctok += string(r)
|
|
continue
|
|
}
|
|
|
|
// nothing special
|
|
closetok = false
|
|
nbuf = append(nbuf, r)
|
|
}
|
|
|
|
// for tokens at the end of a block
|
|
if closetok {
|
|
nbuf = append(nbuf, []rune(term.Reset)...)
|
|
}
|
|
|
|
return string(nbuf)
|
|
}
|
|
|
|
// Wrap wraps to Columns width.
|
|
func Wrap(in string) string { w, _ := to.Wrapped(in, Columns); return w }
|
|
|
|
// Indent indents the number of spaces set by IndentBy.
|
|
func Indent(in string) string { return to.Indented(in, IndentBy) }
|
|
|
|
// InWrap combines both Wrap and Indent.
|
|
func InWrap(in string) string {
|
|
w, _ := to.Wrapped(in, Columns-IndentBy)
|
|
return to.Indented(w, IndentBy)
|
|
}
|
|
|
|
// ---------------------------- finish this ---------------------------
|
|
|
|
// TODO finish the full Mark implementation and documentation
|
|
|
|
/*
|
|
Mark renders BonzaiMark markup as formatted VT100-compatible terminal
|
|
output, wrapped and indented as specified by the package variables
|
|
IndentBy and Columns.
|
|
|
|
BonzaiMark
|
|
|
|
BonzaiMark is a minimal subset of CommonMark (Markdown) suitable for
|
|
rendering to terminals, web pages, PDF, or plain text. The syntax is
|
|
deliberately simple and easy to read (much like standard GoDoc) but rich
|
|
enough to look well rendered in documents as well as the terminal.
|
|
|
|
Stripped Indentation
|
|
|
|
Unlike CommonMark, BonzaiMark ignores any number of blank lines or
|
|
whitespace before the first line and uses the initial characters
|
|
preceding the first line (tabs or spaces) as a basis for what to strip
|
|
from every subsequent line of the document. All trailing white space and
|
|
blank lines are also discarded. This allows BonzaiMark to be included in
|
|
source code in very readable ways (preferably with backtick string
|
|
literals).
|
|
|
|
some := `
|
|
Here is a paragraph
|
|
that will have the initial spaces
|
|
stripped.
|
|
`
|
|
|
|
Structure: Blocks of Spans
|
|
|
|
Like CommonMark, every BonzaiMark document consists of one or more
|
|
blocks which contain one or more spans of the following type:
|
|
|
|
plain
|
|
*italic*
|
|
**bold**
|
|
***bolditalic***
|
|
<under> (brackets remain)
|
|
|
|
Unlike CommonMark, spans may not contain any other span type.
|
|
|
|
While the underline format is not supported in CommonMark, underlining is frequently used in place of italic for most terminals. Angle brackets are, however, supported in CommonMark.
|
|
|
|
The following limited block types are supported. (All others from CommonMark are not):
|
|
|
|
* Paragraph
|
|
* Verbatim
|
|
* Numbered List
|
|
* Bulleted List
|
|
* Numbered Long List Item
|
|
* Bulleted Long List Item
|
|
|
|
Unlike CommonMark, all blocks must be separated by two or more line returns.
|
|
|
|
Paragraph Blocks
|
|
|
|
Paragraph blocks are the most common. They consist of one or more of spans.
|
|
|
|
Verbatim Blocks
|
|
|
|
A Verbatim block will be included exactly as typed. It begins with the
|
|
first line that has four or more spaces followed by a non-whitespace
|
|
character. The block continues until the next block is detected. (All
|
|
other blocks must begin on the first column (after stripped
|
|
indentation).
|
|
|
|
Lists
|
|
|
|
Like CommonMark there are long lists and compact lists. Long lists are
|
|
actually multiple consecutive long list item blocks whereas compact
|
|
lists consist of list items that are on one line after another (no
|
|
double-return block separator).
|
|
|
|
Lists are either numbered or bulleted and never exceed one level (no
|
|
nesting). Both list types may contain any number of paragraph blocks but
|
|
most will only contain the one. When multiple paragraph blocks are
|
|
wanted the subsequent paragraph block must begin on the same column as
|
|
the first character of the first line of the first list item paragraph;
|
|
they must line up.
|
|
|
|
1. **Keep list item paragraphs lined up**
|
|
|
|
This is a second paragraph block under the same list item because
|
|
it lines up with the first line of the first paragraph
|
|
|
|
blank lines -- over multiple consecutive lines but each line after the
|
|
first must line up exactly with the first character of the first list
|
|
item line to be considered still a part of the list item.
|
|
|
|
|
|
Numbered Lists
|
|
|
|
Numbered lists always begin with a go integer and a dot (.).
|
|
Conventionally a 1. is used for everything so that document maintainers
|
|
can quickly reorganize when needed without a tool for renumbered. Nested
|
|
lists are not supported. Each item in the list must be on immediate
|
|
subsequent lines. Numbered lists must always be rendered with Arabic
|
|
numerals.
|
|
|
|
Bulleted Lists
|
|
|
|
Bulleted lists must begin with a single asterisk (*) followed by
|
|
a single space. No other bullet type from CommonMark is supported.
|
|
Nested lists are not supported. Each item in a list must be on an
|
|
immediately subsequent line. List items may contain any number of spans
|
|
over multiple consecutive lines but each line after the first must line
|
|
up exactly with the first character of the first list item line.
|
|
|
|
* This is a
|
|
list item
|
|
|
|
Only Inline Links
|
|
|
|
Only explicit link URLs are supported. The must always be wrapped with
|
|
angle brackets (<>). Technically inline links are a span of type "under"
|
|
which also gives them an underline emphasis on the terminal.
|
|
|
|
No Escapes
|
|
|
|
There is no support for escaping anything in BonzaiMark. (CommonMark allows the placement of a backslash to remove any special meaning.) Therefore, most authors will use verbatim blocks when it is necessary to use the reserved BonzaiMark tokens in other ways.
|
|
|
|
|
|
|
|
Soft and Hard Line Endings
|
|
|
|
Like CommonMark lines that follow other lines immediately are
|
|
effectively joined together unless there are two or more spaces at the
|
|
end of the line (a hard return). This is after any indentation has been
|
|
removed (see Stripped Indentation).
|
|
|
|
|
|
|
|
of spaces for the first line of indentation.
|
|
Any line beginning with at least four spaces (after trimming
|
|
indentation) will be kept verbatim.
|
|
|
|
Emphasis will be applied as possible if the following markup is
|
|
detected:
|
|
|
|
Note that the format of the emphasis might not always be as
|
|
specifically named. For example, most terminal do not support italic
|
|
fonts and so will instead underline *italic* text, so (as specified
|
|
in HTML5 for <i>, for example) these format names should be taken to
|
|
mean their semantic equivalents.
|
|
|
|
For terminal rendering details see the rwxrob/term package.
|
|
*/
|
|
|
|
// Mark
|
|
|
|
func Mark(in string) string {
|
|
if in == "" {
|
|
return ""
|
|
}
|
|
|
|
//var out string
|
|
blocks := Blocks(in)
|
|
log.Print(blocks)
|
|
|
|
//out := to.Dedented(markup)
|
|
//out, _ = to.Wrapped(out, 80)
|
|
//out = Emph(out)
|
|
//return out
|
|
return ""
|
|
}
|
|
|
|
// Blocks strips preceding and trailing white space and then checks the
|
|
// first line for indentation (spaces or tabs) and strips that exact
|
|
// indentation string from every line. It then breaks up the input into
|
|
// blocks separated by one or more empty lines and applies basic
|
|
// formatting to each as follows:
|
|
//
|
|
// If is one of the following leave alone with no wrapping:
|
|
//
|
|
// * Bulleted List - beginning with *
|
|
// * Numbered List - beginning with 1.
|
|
// * Verbatim - beginning with four spaces
|
|
//
|
|
// Everything else is considered a "paragraph" and will be unwrapped
|
|
// into a single long line (which is normally wrapped later).
|
|
//
|
|
// For now, these blocks are added as is, but plans are to eventually
|
|
// add support for short and long lists much like CommonMark.
|
|
//
|
|
// Note that because of the nature of Verbatim's block's initial (4
|
|
// space) token Verbatim blocks must never be first since the entire
|
|
// input buffer is first dedented and the spaces would grouped with the
|
|
// indentation to be stripped. This is never a problem, however,
|
|
// because Verbatim blocks never make sense as the first block in
|
|
// a BonzaiMark document. This simplicity and clarity of 4-space tokens
|
|
// far outweighs the advantages of alternatives (such as fences).
|
|
func Blocks(in string) []string {
|
|
|
|
var blocks []string
|
|
verbpre := regexp.MustCompile(` {4,}`)
|
|
s := scan.R{Buf: []byte(to.Dedented(in))}
|
|
|
|
MAIN:
|
|
for s.Scan() {
|
|
|
|
switch s.Rune {
|
|
|
|
case '*': // bulleted list
|
|
if s.Is(" ") {
|
|
m := s.Pos - 1
|
|
for s.Scan() {
|
|
if s.Is("\n\n") {
|
|
blocks = append(blocks, string(s.Buf[m:s.Pos]))
|
|
s.Pos += 2
|
|
continue MAIN
|
|
}
|
|
}
|
|
}
|
|
|
|
case '1': // numbered list
|
|
if s.Is(". ") {
|
|
m := s.Pos - 1
|
|
for s.Scan() {
|
|
if s.Is("\n\n") {
|
|
blocks = append(blocks, string(s.Buf[m:s.Pos]))
|
|
s.Pos += 2
|
|
continue MAIN
|
|
}
|
|
}
|
|
}
|
|
|
|
case ' ': // verbatim
|
|
s.Pos -= 1
|
|
ln := s.Match(verbpre)
|
|
s.Pos++
|
|
|
|
if ln < 0 {
|
|
continue
|
|
}
|
|
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
|
|
s.Pos += len(pre) - 1
|
|
|
|
block := []rune{}
|
|
for s.Scan() {
|
|
|
|
if s.Rune == '\n' {
|
|
|
|
// add in indented lines
|
|
if s.Is(string(pre)) {
|
|
block = append(block, '\n')
|
|
s.Pos += len(pre)
|
|
continue
|
|
}
|
|
|
|
// end of the block
|
|
blocks = append(blocks, string(block))
|
|
continue MAIN
|
|
}
|
|
|
|
block = append(block, s.Rune)
|
|
}
|
|
|
|
case '\n', '\r', '\t': // inconsequential white space
|
|
continue
|
|
|
|
default: // paragraph
|
|
block := []rune{s.Rune}
|
|
for s.Scan() {
|
|
switch s.Rune {
|
|
case '\n', '\r':
|
|
block = append(block, ' ')
|
|
default:
|
|
block = append(block, s.Rune)
|
|
}
|
|
if s.Is("\n\n") {
|
|
blocks = append(blocks, string(block))
|
|
s.Scan()
|
|
s.Scan()
|
|
continue MAIN
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
return blocks
|
|
}
|