Create Block types for formatting

This commit is contained in:
rwxrob 2022-04-05 12:26:24 -04:00
parent 98ac4f744a
commit fa625e5337
No known key found for this signature in database
GPG Key ID: 2B9111F33082AE77
2 changed files with 178 additions and 313 deletions

425
z/mark.go
View File

@ -1,7 +1,6 @@
package Z
import (
"log"
"regexp"
"unicode"
@ -10,8 +9,6 @@ import (
"github.com/rwxrob/to"
)
// This file contains BonzaiMark.
// IndentBy is the number of spaces to indent in Indent. Default is 7.
// Bonzai command tree creator can change this for every composite
// command imported their application in this one place.
@ -22,6 +19,142 @@ var IndentBy = 7
// composite command imported their application in this one place.
var Columns = 80
// Lines returns the string converted into a slice of lines.
func Lines(in string) []string { return to.Lines(in) }
const (
Paragraph = iota + 1
Numbered
Bulleted
Verbatim
)
type Block struct {
T int
V []byte
}
// String fulfills the fmt.Stringer interface.
func (s *Block) String() string { return string(s.V) }
// Blocks strips preceding and trailing white space and then checks the
// first line for indentation (spaces or tabs) and strips that exact
// indentation string from every line. It then breaks up the input into
// blocks separated by one or more empty lines and applies basic
// formatting to each as follows:
//
// If is one of the following leave alone with no wrapping:
//
// * Bulleted List - beginning with *
// * Numbered List - beginning with 1.
// * Verbatim - beginning with four spaces
//
// Everything else is considered a "paragraph" and will be unwrapped
// into a single long line (which is normally wrapped later).
//
// For now, these blocks are added as is, but plans are to eventually
// add support for short and long lists much like CommonMark.
//
// Note that because of the nature of Verbatim's block's initial (4
// space) token Verbatim blocks must never be first since the entire
// input buffer is first dedented and the spaces would grouped with the
// indentation to be stripped. This is never a problem, however,
// because Verbatim blocks never make sense as the first block in
// a BonzaiMark document. This simplicity and clarity of 4-space tokens
// far outweighs the advantages of alternatives (such as fences).
func Blocks(in string) []*Block {
var blocks []*Block
verbpre := regexp.MustCompile(` {4,}`)
s := scan.R{Buf: []byte(to.Dedented(in))}
MAIN:
for s.Scan() {
switch s.Rune {
case '*': // bulleted list
if s.Is(" ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, &Block{Bulleted, s.Buf[m:s.Pos]})
s.Pos += 2
continue MAIN
}
}
}
case '1': // numbered list
if s.Is(". ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, &Block{Numbered, s.Buf[m:s.Pos]})
s.Pos += 2
continue MAIN
}
}
}
case ' ': // verbatim
s.Pos -= 1
ln := s.Match(verbpre)
s.Pos++
if ln < 0 {
continue
}
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
s.Pos += len(pre) - 1
var block []byte
for s.Scan() {
if s.Rune == '\n' {
// add in indented lines
if s.Is(string(pre)) {
block = append(block, '\n')
s.Pos += len(pre)
continue
}
// end of the block
blocks = append(blocks, &Block{Verbatim, block})
continue MAIN
}
block = append(block, []byte(string(s.Rune))...)
}
case '\n', '\r', '\t': // inconsequential white space
continue
default: // paragraph
var block []byte
block = append(block, []byte(string(s.Rune))...)
for s.Scan() {
switch s.Rune {
case '\n', '\r':
block = append(block, ' ')
default:
block = append(block, []byte(string(s.Rune))...)
}
if s.Is("\n\n") {
blocks = append(blocks, &Block{Paragraph, block})
s.Scan()
s.Scan()
continue MAIN
}
}
}
}
return blocks
}
// Emph renders BonzaiMark emphasis spans specifically for
// VT100-compatible terminals (which almost all are today):
//
@ -129,168 +262,26 @@ func InWrap(in string) string {
return to.Indented(w, IndentBy)
}
// ---------------------------- finish this ---------------------------
// TODO finish the full Mark implementation and documentation
/*
Mark renders BonzaiMark markup as formatted VT100-compatible terminal
output, wrapped and indented as specified by the package variables
IndentBy and Columns.
BonzaiMark
BonzaiMark is a minimal subset of CommonMark (Markdown) suitable for
rendering to terminals, web pages, PDF, or plain text. The syntax is
deliberately simple and easy to read (much like standard GoDoc) but rich
enough to look well rendered in documents as well as the terminal.
Stripped Indentation
Unlike CommonMark, BonzaiMark ignores any number of blank lines or
whitespace before the first line and uses the initial characters
preceding the first line (tabs or spaces) as a basis for what to strip
from every subsequent line of the document. All trailing white space and
blank lines are also discarded. This allows BonzaiMark to be included in
source code in very readable ways (preferably with backtick string
literals).
some := `
Here is a paragraph
that will have the initial spaces
stripped.
`
Structure: Blocks of Spans
Like CommonMark, every BonzaiMark document consists of one or more
blocks which contain one or more spans of the following type:
plain
*italic*
**bold**
***bolditalic***
<under> (brackets remain)
Unlike CommonMark, spans may not contain any other span type.
While the underline format is not supported in CommonMark, underlining is frequently used in place of italic for most terminals. Angle brackets are, however, supported in CommonMark.
The following limited block types are supported. (All others from CommonMark are not):
* Paragraph
* Verbatim
* Numbered List
* Bulleted List
* Numbered Long List Item
* Bulleted Long List Item
Unlike CommonMark, all blocks must be separated by two or more line returns.
Paragraph Blocks
Paragraph blocks are the most common. They consist of one or more of spans.
Verbatim Blocks
A Verbatim block will be included exactly as typed. It begins with the
first line that has four or more spaces followed by a non-whitespace
character. The block continues until the next block is detected. (All
other blocks must begin on the first column (after stripped
indentation).
Lists
Like CommonMark there are long lists and compact lists. Long lists are
actually multiple consecutive long list item blocks whereas compact
lists consist of list items that are on one line after another (no
double-return block separator).
Lists are either numbered or bulleted and never exceed one level (no
nesting). Both list types may contain any number of paragraph blocks but
most will only contain the one. When multiple paragraph blocks are
wanted the subsequent paragraph block must begin on the same column as
the first character of the first line of the first list item paragraph;
they must line up.
1. **Keep list item paragraphs lined up**
This is a second paragraph block under the same list item because
it lines up with the first line of the first paragraph
blank lines -- over multiple consecutive lines but each line after the
first must line up exactly with the first character of the first list
item line to be considered still a part of the list item.
Numbered Lists
Numbered lists always begin with a go integer and a dot (.).
Conventionally a 1. is used for everything so that document maintainers
can quickly reorganize when needed without a tool for renumbered. Nested
lists are not supported. Each item in the list must be on immediate
subsequent lines. Numbered lists must always be rendered with Arabic
numerals.
Bulleted Lists
Bulleted lists must begin with a single asterisk (*) followed by
a single space. No other bullet type from CommonMark is supported.
Nested lists are not supported. Each item in a list must be on an
immediately subsequent line. List items may contain any number of spans
over multiple consecutive lines but each line after the first must line
up exactly with the first character of the first list item line.
* This is a
list item
Only Inline Links
Only explicit link URLs are supported. The must always be wrapped with
angle brackets (<>). Technically inline links are a span of type "under"
which also gives them an underline emphasis on the terminal.
No Escapes
There is no support for escaping anything in BonzaiMark. (CommonMark allows the placement of a backslash to remove any special meaning.) Therefore, most authors will use verbatim blocks when it is necessary to use the reserved BonzaiMark tokens in other ways.
Soft and Hard Line Endings
Like CommonMark lines that follow other lines immediately are
effectively joined together unless there are two or more spaces at the
end of the line (a hard return). This is after any indentation has been
removed (see Stripped Indentation).
of spaces for the first line of indentation.
Any line beginning with at least four spaces (after trimming
indentation) will be kept verbatim.
Emphasis will be applied as possible if the following markup is
detected:
Note that the format of the emphasis might not always be as
specifically named. For example, most terminal do not support italic
fonts and so will instead underline *italic* text, so (as specified
in HTML5 for <i>, for example) these format names should be taken to
mean their semantic equivalents.
For terminal rendering details see the rwxrob/term package.
*/
// Mark
// Mark parses the input as a string of BonzaiMark, multiple blocks with
// optional emphasis (see Blocks and Emph).
func Mark(in string) string {
if in == "" {
return ""
}
//var out string
blocks := Blocks(in)
log.Print(blocks)
if len(blocks) == 0 {
return ""
}
//var out string
indent := to.Indentation(blocks[0])
for _, block := range blocks {
}
//out := to.Dedented(markup)
//out, _ = to.Wrapped(out, 80)
@ -298,120 +289,4 @@ func Mark(in string) string {
//return out
return ""
}
// Blocks strips preceding and trailing white space and then checks the
// first line for indentation (spaces or tabs) and strips that exact
// indentation string from every line. It then breaks up the input into
// blocks separated by one or more empty lines and applies basic
// formatting to each as follows:
//
// If is one of the following leave alone with no wrapping:
//
// * Bulleted List - beginning with *
// * Numbered List - beginning with 1.
// * Verbatim - beginning with four spaces
//
// Everything else is considered a "paragraph" and will be unwrapped
// into a single long line (which is normally wrapped later).
//
// For now, these blocks are added as is, but plans are to eventually
// add support for short and long lists much like CommonMark.
//
// Note that because of the nature of Verbatim's block's initial (4
// space) token Verbatim blocks must never be first since the entire
// input buffer is first dedented and the spaces would grouped with the
// indentation to be stripped. This is never a problem, however,
// because Verbatim blocks never make sense as the first block in
// a BonzaiMark document. This simplicity and clarity of 4-space tokens
// far outweighs the advantages of alternatives (such as fences).
func Blocks(in string) []string {
var blocks []string
verbpre := regexp.MustCompile(` {4,}`)
s := scan.R{Buf: []byte(to.Dedented(in))}
MAIN:
for s.Scan() {
switch s.Rune {
case '*': // bulleted list
if s.Is(" ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, string(s.Buf[m:s.Pos]))
s.Pos += 2
continue MAIN
}
}
}
case '1': // numbered list
if s.Is(". ") {
m := s.Pos - 1
for s.Scan() {
if s.Is("\n\n") {
blocks = append(blocks, string(s.Buf[m:s.Pos]))
s.Pos += 2
continue MAIN
}
}
}
case ' ': // verbatim
s.Pos -= 1
ln := s.Match(verbpre)
s.Pos++
if ln < 0 {
continue
}
pre := s.Buf[s.Pos-1 : s.Pos+ln-1]
s.Pos += len(pre) - 1
block := []rune{}
for s.Scan() {
if s.Rune == '\n' {
// add in indented lines
if s.Is(string(pre)) {
block = append(block, '\n')
s.Pos += len(pre)
continue
}
// end of the block
blocks = append(blocks, string(block))
continue MAIN
}
block = append(block, s.Rune)
}
case '\n', '\r', '\t': // inconsequential white space
continue
default: // paragraph
block := []rune{s.Rune}
for s.Scan() {
switch s.Rune {
case '\n', '\r':
block = append(block, ' ')
default:
block = append(block, s.Rune)
}
if s.Is("\n\n") {
blocks = append(blocks, string(block))
s.Scan()
s.Scan()
continue MAIN
}
}
}
}
return blocks
}
*/

View File

@ -7,6 +7,12 @@ import (
"github.com/rwxrob/term"
)
func ExampleLines() {
fmt.Printf("%q\n", Z.Lines("line one\nline two"))
// Output:
// ["line one" "line two"]
}
func ExampleEmph_basics() {
// Emph observes the rwxrob/term escapes
@ -146,51 +152,35 @@ func ExampleBlocks_verbatim() {
}
// Now we can start
// a Verbatim
// block.
//
// Which can have blank lines, even.
/*
func ExampleBlocks() {
func ExampleMark() {
in := `
Must have *another* block before verbatim:
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
eiusmod tempor incididunt ut labore et dolore magna aliqua.
Now we can start
a Verbatim
block.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
nisi ut aliquip ex ea commodo consequat.
Which can have blank lines, even.
Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
cupidatat non proident, sunt in culpa qui officia deserunt mollit
anim id est laborum.
And back to a paragraph block.
`
* foo
* bar
fmt.Println(Z.Blocks(in))
And a numbered list
1. Something
2. here
That's really it.
`
fmt.Println(Z.Mark(in))
//Output:
// Output:
// some thing
}
*/
/*
func ExampleFormat_remove_Initial_Blanks() {
fmt.Printf("%q\n", Z.Format("\n \n\n \n some"))
// Output:
// "some"
}
func ExampleFormat_wrapping() {
fmt.Println(Z.Format(`
Here is a bunch of stuff just to fill the line beyond 80 columns so that it will wrap when it is supposed to and right now
as well if there was a hard return in the middle of a line.
`))
// Output:
// Here is a bunch of stuff just to fill the line beyond 80 columns so that it will
// wrap when it is supposed to and right now
// as well if there was a hard return in the middle of a line.
}
*/