mirror of https://github.com/edouardparis/lntop
views: message.NewPrinter(language.English)
parent
48011bb108
commit
7efa68f170
@ -0,0 +1,70 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package plural
|
||||
|
||||
// Form defines a plural form.
|
||||
//
|
||||
// Not all languages support all forms. Also, the meaning of each form varies
|
||||
// per language. It is important to note that the name of a form does not
|
||||
// necessarily correspond one-to-one with the set of numbers. For instance,
|
||||
// for Croation, One matches not only 1, but also 11, 21, etc.
|
||||
//
|
||||
// Each language must at least support the form "other".
|
||||
type Form byte
|
||||
|
||||
const (
|
||||
Other Form = iota
|
||||
Zero
|
||||
One
|
||||
Two
|
||||
Few
|
||||
Many
|
||||
)
|
||||
|
||||
var countMap = map[string]Form{
|
||||
"other": Other,
|
||||
"zero": Zero,
|
||||
"one": One,
|
||||
"two": Two,
|
||||
"few": Few,
|
||||
"many": Many,
|
||||
}
|
||||
|
||||
type pluralCheck struct {
|
||||
// category:
|
||||
// 3..7: opID
|
||||
// 0..2: category
|
||||
cat byte
|
||||
setID byte
|
||||
}
|
||||
|
||||
// opID identifies the type of operand in the plural rule, being i, n or f.
|
||||
// (v, w, and t are treated as filters in our implementation.)
|
||||
type opID byte
|
||||
|
||||
const (
|
||||
opMod opID = 0x1 // is '%' used?
|
||||
opNotEqual opID = 0x2 // using "!=" to compare
|
||||
opI opID = 0 << 2 // integers after taking the absolute value
|
||||
opN opID = 1 << 2 // full number (must be integer)
|
||||
opF opID = 2 << 2 // fraction
|
||||
opV opID = 3 << 2 // number of visible digits
|
||||
opW opID = 4 << 2 // number of visible digits without trailing zeros
|
||||
opBretonM opID = 5 << 2 // hard-wired rule for Breton
|
||||
opItalian800 opID = 6 << 2 // hard-wired rule for Italian
|
||||
opAzerbaijan00s opID = 7 << 2 // hard-wired rule for Azerbaijan
|
||||
)
|
||||
const (
|
||||
// Use this plural form to indicate the next rule needs to match as well.
|
||||
// The last condition in the list will have the correct plural form.
|
||||
andNext = 0x7
|
||||
formMask = 0x7
|
||||
|
||||
opShift = 3
|
||||
|
||||
// numN indicates the maximum integer, or maximum mod value, for which we
|
||||
// have inclusion masks.
|
||||
numN = 100
|
||||
// The common denominator of the modulo that is taken.
|
||||
maxMod = 100
|
||||
)
|
@ -0,0 +1,525 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates data for the CLDR plural rules, as defined in
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
||||
//
|
||||
// We assume a slightly simplified grammar:
|
||||
//
|
||||
// condition = and_condition ('or' and_condition)* samples
|
||||
// and_condition = relation ('and' relation)*
|
||||
// relation = expr ('=' | '!=') range_list
|
||||
// expr = operand ('%' '10' '0'* )?
|
||||
// operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
|
||||
// range_list = (range | value) (',' range_list)*
|
||||
// range = value'..'value
|
||||
// value = digit+
|
||||
// digit = 0|1|2|3|4|5|6|7|8|9
|
||||
//
|
||||
// samples = ('@integer' sampleList)?
|
||||
// ('@decimal' sampleList)?
|
||||
// sampleList = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
|
||||
// sampleRange = decimalValue ('~' decimalValue)?
|
||||
// decimalValue = value ('.' value)?
|
||||
//
|
||||
// Symbol Value
|
||||
// n absolute value of the source number (integer and decimals).
|
||||
// i integer digits of n.
|
||||
// v number of visible fraction digits in n, with trailing zeros.
|
||||
// w number of visible fraction digits in n, without trailing zeros.
|
||||
// f visible fractional digits in n, with trailing zeros.
|
||||
// t visible fractional digits in n, without trailing zeros.
|
||||
//
|
||||
// The algorithm for which the data is generated is based on the following
|
||||
// observations
|
||||
//
|
||||
// - the number of different sets of numbers which the plural rules use to
|
||||
// test inclusion is limited,
|
||||
// - most numbers that are tested on are < 100
|
||||
//
|
||||
// This allows us to define a bitmap for each number < 100 where a bit i
|
||||
// indicates whether this number is included in some defined set i.
|
||||
// The function matchPlural in plural.go defines how we can subsequently use
|
||||
// this data to determine inclusion.
|
||||
//
|
||||
// There are a few languages for which this doesn't work. For one Italian and
|
||||
// Azerbaijan, which both test against numbers > 100 for ordinals and Breton,
|
||||
// which considers whether numbers are multiples of hundreds. The model here
|
||||
// could be extended to handle Italian and Azerbaijan fairly easily (by
|
||||
// considering the numbers 100, 200, 300, ..., 800, 900 in addition to the first
|
||||
// 100), but for now it seems easier to just hard-code these cases.
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output", "tables.go", "output file")
|
||||
outputTestFile = flag.String("testoutput", "data_test.go", "output file")
|
||||
|
||||
draft = flag.String("draft",
|
||||
"contributed",
|
||||
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
const pkg = "plural"
|
||||
|
||||
gen.Repackage("gen_common.go", "common.go", pkg)
|
||||
// Read the CLDR zip file.
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("supplemental", "main")
|
||||
d.SetSectionFilter("numbers", "plurals")
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, pkg)
|
||||
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
genPlurals(w, data)
|
||||
|
||||
w = gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputTestFile, pkg)
|
||||
|
||||
genPluralsTests(w, data)
|
||||
}
|
||||
|
||||
type pluralTest struct {
|
||||
locales string // space-separated list of locales for this test
|
||||
form int // Use int instead of Form to simplify generation.
|
||||
integer []string // Entries of the form \d+ or \d+~\d+
|
||||
decimal []string // Entries of the form \f+ or \f+ +~\f+, where f is \d+\.\d+
|
||||
}
|
||||
|
||||
func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) {
|
||||
w.WriteType(pluralTest{})
|
||||
|
||||
for _, plurals := range data.Supplemental().Plurals {
|
||||
if plurals.Type == "" {
|
||||
// The empty type is reserved for plural ranges.
|
||||
continue
|
||||
}
|
||||
tests := []pluralTest{}
|
||||
|
||||
for _, pRules := range plurals.PluralRules {
|
||||
for _, rule := range pRules.PluralRule {
|
||||
test := pluralTest{
|
||||
locales: pRules.Locales,
|
||||
form: int(countMap[rule.Count]),
|
||||
}
|
||||
scan := bufio.NewScanner(strings.NewReader(rule.Data()))
|
||||
scan.Split(splitTokens)
|
||||
var p *[]string
|
||||
for scan.Scan() {
|
||||
switch t := scan.Text(); t {
|
||||
case "@integer":
|
||||
p = &test.integer
|
||||
case "@decimal":
|
||||
p = &test.decimal
|
||||
case ",", "…":
|
||||
default:
|
||||
if p != nil {
|
||||
*p = append(*p, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
tests = append(tests, test)
|
||||
}
|
||||
}
|
||||
w.WriteVar(plurals.Type+"Tests", tests)
|
||||
}
|
||||
}
|
||||
|
||||
func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) {
|
||||
for _, plurals := range data.Supplemental().Plurals {
|
||||
if plurals.Type == "" {
|
||||
continue
|
||||
}
|
||||
// Initialize setMap and inclusionMasks. They are already populated with
|
||||
// a few entries to serve as an example and to assign nice numbers to
|
||||
// common cases.
|
||||
|
||||
// setMap contains sets of numbers represented by boolean arrays where
|
||||
// a true value for element i means that the number i is included.
|
||||
setMap := map[[numN]bool]int{
|
||||
// The above init func adds an entry for including all numbers.
|
||||
[numN]bool{1: true}: 1, // fix {1} to a nice value
|
||||
[numN]bool{2: true}: 2, // fix {2} to a nice value
|
||||
[numN]bool{0: true}: 3, // fix {0} to a nice value
|
||||
}
|
||||
|
||||
// inclusionMasks contains bit masks for every number under numN to
|
||||
// indicate in which set the number is included. Bit 1 << x will be set
|
||||
// if it is included in set x.
|
||||
inclusionMasks := [numN]uint64{
|
||||
// Note: these entries are not complete: more bits will be set along the way.
|
||||
0: 1 << 3,
|
||||
1: 1 << 1,
|
||||
2: 1 << 2,
|
||||
}
|
||||
|
||||
// Create set {0..99}. We will assign this set the identifier 0.
|
||||
var all [numN]bool
|
||||
for i := range all {
|
||||
// Mark number i as being included in the set (which has identifier 0).
|
||||
inclusionMasks[i] |= 1 << 0
|
||||
// Mark number i as included in the set.
|
||||
all[i] = true
|
||||
}
|
||||
// Register the identifier for the set.
|
||||
setMap[all] = 0
|
||||
|
||||
rules := []pluralCheck{}
|
||||
index := []byte{0}
|
||||
langMap := map[compact.ID]byte{0: 0}
|
||||
|
||||
for _, pRules := range plurals.PluralRules {
|
||||
// Parse the rules.
|
||||
var conds []orCondition
|
||||
for _, rule := range pRules.PluralRule {
|
||||
form := countMap[rule.Count]
|
||||
conds = parsePluralCondition(conds, rule.Data(), form)
|
||||
}
|
||||
// Encode the rules.
|
||||
for _, c := range conds {
|
||||
// If an or condition only has filters, we create an entry for
|
||||
// this filter and the set that contains all values.
|
||||
empty := true
|
||||
for _, b := range c.used {
|
||||
empty = empty && !b
|
||||
}
|
||||
if empty {
|
||||
rules = append(rules, pluralCheck{
|
||||
cat: byte(opMod<<opShift) | byte(c.form),
|
||||
setID: 0, // all values
|
||||
})
|
||||
continue
|
||||
}
|
||||
// We have some entries with values.
|
||||
for i, set := range c.set {
|
||||
if !c.used[i] {
|
||||
continue
|
||||
}
|
||||
index, ok := setMap[set]
|
||||
if !ok {
|
||||
index = len(setMap)
|
||||
setMap[set] = index
|
||||
for i := range inclusionMasks {
|
||||
if set[i] {
|
||||
inclusionMasks[i] |= 1 << uint64(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
rules = append(rules, pluralCheck{
|
||||
cat: byte(i<<opShift | andNext),
|
||||
setID: byte(index),
|
||||
})
|
||||
}
|
||||
// Now set the last entry to the plural form the rule matches.
|
||||
rules[len(rules)-1].cat &^= formMask
|
||||
rules[len(rules)-1].cat |= byte(c.form)
|
||||
}
|
||||
// Point the relevant locales to the created entries.
|
||||
for _, loc := range strings.Split(pRules.Locales, " ") {
|
||||
if strings.TrimSpace(loc) == "" {
|
||||
continue
|
||||
}
|
||||
lang, ok := compact.FromTag(language.MustParse(loc))
|
||||
if !ok {
|
||||
log.Printf("No compact index for locale %q", loc)
|
||||
}
|
||||
langMap[lang] = byte(len(index) - 1)
|
||||
}
|
||||
index = append(index, byte(len(rules)))
|
||||
}
|
||||
w.WriteVar(plurals.Type+"Rules", rules)
|
||||
w.WriteVar(plurals.Type+"Index", index)
|
||||
// Expand the values: first by using the parent relationship.
|
||||
langToIndex := make([]byte, compact.NumCompactTags)
|
||||
for i := range langToIndex {
|
||||
for p := compact.ID(i); ; p = p.Parent() {
|
||||
if x, ok := langMap[p]; ok {
|
||||
langToIndex[i] = x
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now expand by including entries with identical languages for which
|
||||
// one isn't set.
|
||||
for i, v := range langToIndex {
|
||||
if v == 0 {
|
||||
id, _ := compact.FromTag(language.Tag{
|
||||
LangID: compact.ID(i).Tag().LangID,
|
||||
})
|
||||
if p := langToIndex[id]; p != 0 {
|
||||
langToIndex[i] = p
|
||||
}
|
||||
}
|
||||
}
|
||||
w.WriteVar(plurals.Type+"LangToIndex", langToIndex)
|
||||
// Need to convert array to slice because of golang.org/issue/7651.
|
||||
// This will allow tables to be dropped when unused. This is especially
|
||||
// relevant for the ordinal data, which I suspect won't be used as much.
|
||||
w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:])
|
||||
|
||||
if len(rules) > 0xFF {
|
||||
log.Fatalf("Too many entries for rules: %#x", len(rules))
|
||||
}
|
||||
if len(index) > 0xFF {
|
||||
log.Fatalf("Too many entries for index: %#x", len(index))
|
||||
}
|
||||
if len(setMap) > 64 { // maximum number of bits.
|
||||
log.Fatalf("Too many entries for setMap: %d", len(setMap))
|
||||
}
|
||||
w.WriteComment(
|
||||
"Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets",
|
||||
plurals.Type, len(rules), len(index), len(setMap))
|
||||
// Prevent comment from attaching to the next entry.
|
||||
fmt.Fprint(w, "\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
type orCondition struct {
|
||||
original string // for debugging
|
||||
|
||||
form Form
|
||||
used [32]bool
|
||||
set [32][numN]bool
|
||||
}
|
||||
|
||||
func (o *orCondition) add(op opID, mod int, v []int) (ok bool) {
|
||||
ok = true
|
||||
for _, x := range v {
|
||||
if x >= maxMod {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
}
|
||||
for i := 0; i < numN; i++ {
|
||||
m := i
|
||||
if mod != 0 {
|
||||
m = i % mod
|
||||
}
|
||||
if !intIn(m, v) {
|
||||
o.set[op][i] = false
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
o.used[op] = true
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func intIn(x int, a []int) bool {
|
||||
for _, y := range a {
|
||||
if x == y {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var operandIndex = map[string]opID{
|
||||
"i": opI,
|
||||
"n": opN,
|
||||
"f": opF,
|
||||
"v": opV,
|
||||
"w": opW,
|
||||
}
|
||||
|
||||
// parsePluralCondition parses the condition of a single pluralRule and appends
|
||||
// the resulting or conditions to conds.
|
||||
//
|
||||
// Example rules:
|
||||
// // Category "one" in English: only allow 1 with no visible fraction
|
||||
// i = 1 and v = 0 @integer 1
|
||||
//
|
||||
// // Category "few" in Czech: all numbers with visible fractions
|
||||
// v != 0 @decimal ...
|
||||
//
|
||||
// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or
|
||||
// // numbers with a fraction 11..19 and no trailing zeros.
|
||||
// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ...
|
||||
//
|
||||
// @integer and @decimal are followed by examples and are not relevant for the
|
||||
// rule itself. The are used here to signal the termination of the rule.
|
||||
func parsePluralCondition(conds []orCondition, s string, f Form) []orCondition {
|
||||
scan := bufio.NewScanner(strings.NewReader(s))
|
||||
scan.Split(splitTokens)
|
||||
for {
|
||||
cond := orCondition{original: s, form: f}
|
||||
// Set all numbers to be allowed for all number classes and restrict
|
||||
// from here on.
|
||||
for i := range cond.set {
|
||||
for j := range cond.set[i] {
|
||||
cond.set[i][j] = true
|
||||
}
|
||||
}
|
||||
andLoop:
|
||||
for {
|
||||
var token string
|
||||
scan.Scan() // Must exist.
|
||||
switch class := scan.Text(); class {
|
||||
case "t":
|
||||
class = "w" // equal to w for t == 0
|
||||
fallthrough
|
||||
case "n", "i", "f", "v", "w":
|
||||
op := scanToken(scan)
|
||||
opCode := operandIndex[class]
|
||||
mod := 0
|
||||
if op == "%" {
|
||||
opCode |= opMod
|
||||
|
||||
switch v := scanUint(scan); v {
|
||||
case 10, 100:
|
||||
mod = v
|
||||
case 1000:
|
||||
// A more general solution would be to allow checking
|
||||
// against multiples of 100 and include entries for the
|
||||
// numbers 100..900 in the inclusion masks. At the
|
||||
// moment this would only help Azerbaijan and Italian.
|
||||
|
||||
// Italian doesn't use '%', so this must be Azerbaijan.
|
||||
cond.used[opAzerbaijan00s] = true
|
||||
return append(conds, cond)
|
||||
|
||||
case 1000000:
|
||||
cond.used[opBretonM] = true
|
||||
return append(conds, cond)
|
||||
|
||||
default:
|
||||
log.Fatalf("Modulo value not supported %d", v)
|
||||
}
|
||||
op = scanToken(scan)
|
||||
}
|
||||
if op != "=" && op != "!=" {
|
||||
log.Fatalf("Unexpected op %q", op)
|
||||
}
|
||||
if op == "!=" {
|
||||
opCode |= opNotEqual
|
||||
}
|
||||
a := []int{}
|
||||
v := scanUint(scan)
|
||||
if class == "w" && v != 0 {
|
||||
log.Fatalf("Must compare against zero for operand type %q", class)
|
||||
}
|
||||
token = scanToken(scan)
|
||||
for {
|
||||
switch token {
|
||||
case "..":
|
||||
end := scanUint(scan)
|
||||
for ; v <= end; v++ {
|
||||
a = append(a, v)
|
||||
}
|
||||
token = scanToken(scan)
|
||||
default: // ",", "or", "and", "@..."
|
||||
a = append(a, v)
|
||||
}
|
||||
if token != "," {
|
||||
break
|
||||
}
|
||||
v = scanUint(scan)
|
||||
token = scanToken(scan)
|
||||
}
|
||||
if !cond.add(opCode, mod, a) {
|
||||
// Detected large numbers. As we ruled out Azerbaijan, this
|
||||
// must be the many rule for Italian ordinals.
|
||||
cond.set[opItalian800] = cond.set[opN]
|
||||
cond.used[opItalian800] = true
|
||||
}
|
||||
|
||||
case "@integer", "@decimal": // "other" entry: tests only.
|
||||
return conds
|
||||
default:
|
||||
log.Fatalf("Unexpected operand class %q (%s)", class, s)
|
||||
}
|
||||
switch token {
|
||||
case "or":
|
||||
conds = append(conds, cond)
|
||||
break andLoop
|
||||
case "@integer", "@decimal": // examples
|
||||
// There is always an example in practice, so we always terminate here.
|
||||
if err := scan.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return append(conds, cond)
|
||||
case "and":
|
||||
// keep accumulating
|
||||
default:
|
||||
log.Fatalf("Unexpected token %q", token)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func scanToken(scan *bufio.Scanner) string {
|
||||
scan.Scan()
|
||||
return scan.Text()
|
||||
}
|
||||
|
||||
func scanUint(scan *bufio.Scanner) int {
|
||||
scan.Scan()
|
||||
val, err := strconv.ParseUint(scan.Text(), 10, 32)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return int(val)
|
||||
}
|
||||
|
||||
// splitTokens can be used with bufio.Scanner to tokenize CLDR plural rules.
|
||||
func splitTokens(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
condTokens := [][]byte{
|
||||
[]byte(".."),
|
||||
[]byte(","),
|
||||
[]byte("!="),
|
||||
[]byte("="),
|
||||
}
|
||||
advance, token, err = bufio.ScanWords(data, atEOF)
|
||||
for _, t := range condTokens {
|
||||
if len(t) >= len(token) {
|
||||
continue
|
||||
}
|
||||
switch p := bytes.Index(token, t); {
|
||||
case p == -1:
|
||||
case p == 0:
|
||||
advance = len(t)
|
||||
token = token[:len(t)]
|
||||
return advance - len(token) + len(t), token[:len(t)], err
|
||||
case p < advance:
|
||||
// Don't split when "=" overlaps "!=".
|
||||
if t[0] == '=' && token[p-1] == '!' {
|
||||
continue
|
||||
}
|
||||
advance = p
|
||||
token = token[:p]
|
||||
}
|
||||
}
|
||||
return advance, token, err
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// Form defines a plural form.
|
||||
//
|
||||
// Not all languages support all forms. Also, the meaning of each form varies
|
||||
// per language. It is important to note that the name of a form does not
|
||||
// necessarily correspond one-to-one with the set of numbers. For instance,
|
||||
// for Croation, One matches not only 1, but also 11, 21, etc.
|
||||
//
|
||||
// Each language must at least support the form "other".
|
||||
type Form byte
|
||||
|
||||
const (
|
||||
Other Form = iota
|
||||
Zero
|
||||
One
|
||||
Two
|
||||
Few
|
||||
Many
|
||||
)
|
||||
|
||||
var countMap = map[string]Form{
|
||||
"other": Other,
|
||||
"zero": Zero,
|
||||
"one": One,
|
||||
"two": Two,
|
||||
"few": Few,
|
||||
"many": Many,
|
||||
}
|
||||
|
||||
type pluralCheck struct {
|
||||
// category:
|
||||
// 3..7: opID
|
||||
// 0..2: category
|
||||
cat byte
|
||||
setID byte
|
||||
}
|
||||
|
||||
// opID identifies the type of operand in the plural rule, being i, n or f.
|
||||
// (v, w, and t are treated as filters in our implementation.)
|
||||
type opID byte
|
||||
|
||||
const (
|
||||
opMod opID = 0x1 // is '%' used?
|
||||
opNotEqual opID = 0x2 // using "!=" to compare
|
||||
opI opID = 0 << 2 // integers after taking the absolute value
|
||||
opN opID = 1 << 2 // full number (must be integer)
|
||||
opF opID = 2 << 2 // fraction
|
||||
opV opID = 3 << 2 // number of visible digits
|
||||
opW opID = 4 << 2 // number of visible digits without trailing zeros
|
||||
opBretonM opID = 5 << 2 // hard-wired rule for Breton
|
||||
opItalian800 opID = 6 << 2 // hard-wired rule for Italian
|
||||
opAzerbaijan00s opID = 7 << 2 // hard-wired rule for Azerbaijan
|
||||
)
|
||||
const (
|
||||
// Use this plural form to indicate the next rule needs to match as well.
|
||||
// The last condition in the list will have the correct plural form.
|
||||
andNext = 0x7
|
||||
formMask = 0x7
|
||||
|
||||
opShift = 3
|
||||
|
||||
// numN indicates the maximum integer, or maximum mod value, for which we
|
||||
// have inclusion masks.
|
||||
numN = 100
|
||||
// The common denominator of the modulo that is taken.
|
||||
maxMod = 100
|
||||
)
|
@ -0,0 +1,244 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package plural
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"reflect"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/catmsg"
|
||||
"golang.org/x/text/internal/number"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message/catalog"
|
||||
)
|
||||
|
||||
// TODO: consider deleting this interface. Maybe VisibleDigits is always
|
||||
// sufficient and practical.
|
||||
|
||||
// Interface is used for types that can determine their own plural form.
|
||||
type Interface interface {
|
||||
// PluralForm reports the plural form for the given language of the
|
||||
// underlying value. It also returns the integer value. If the integer value
|
||||
// is larger than fits in n, PluralForm may return a value modulo
|
||||
// 10,000,000.
|
||||
PluralForm(t language.Tag, scale int) (f Form, n int)
|
||||
}
|
||||
|
||||
// Selectf returns the first case for which its selector is a match for the
|
||||
// arg-th substitution argument to a formatting call, formatting it as indicated
|
||||
// by format.
|
||||
//
|
||||
// The cases argument are pairs of selectors and messages. Selectors are of type
|
||||
// string or Form. Messages are of type string or catalog.Message. A selector
|
||||
// matches an argument if:
|
||||
// - it is "other" or Other
|
||||
// - it matches the plural form of the argument: "zero", "one", "two", "few",
|
||||
// or "many", or the equivalent Form
|
||||
// - it is of the form "=x" where x is an integer that matches the value of
|
||||
// the argument.
|
||||
// - it is of the form "<x" where x is an integer that is larger than the
|
||||
// argument.
|
||||
//
|
||||
// The format argument determines the formatting parameters for which to
|
||||
// determine the plural form. This is especially relevant for non-integer
|
||||
// values.
|
||||
//
|
||||
// The format string may be "", in which case a best-effort attempt is made to
|
||||
// find a reasonable representation on which to base the plural form. Examples
|
||||
// of format strings are:
|
||||
// - %.2f decimal with scale 2
|
||||
// - %.2e scientific notation with precision 3 (scale + 1)
|
||||
// - %d integer
|
||||
func Selectf(arg int, format string, cases ...interface{}) catalog.Message {
|
||||
var p parser
|
||||
// Intercept the formatting parameters of format by doing a dummy print.
|
||||
fmt.Fprintf(ioutil.Discard, format, &p)
|
||||
m := &message{arg, kindDefault, 0, cases}
|
||||
switch p.verb {
|
||||
case 'g':
|
||||
m.kind = kindPrecision
|
||||
m.scale = p.scale
|
||||
case 'f':
|
||||
m.kind = kindScale
|
||||
m.scale = p.scale
|
||||
case 'e':
|
||||
m.kind = kindScientific
|
||||
m.scale = p.scale
|
||||
case 'd':
|
||||
m.kind = kindScale
|
||||
m.scale = 0
|
||||
default:
|
||||
// TODO: do we need to handle errors?
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
verb rune
|
||||
scale int
|
||||
}
|
||||
|
||||
func (p *parser) Format(s fmt.State, verb rune) {
|
||||
p.verb = verb
|
||||
p.scale = -1
|
||||
if prec, ok := s.Precision(); ok {
|
||||
p.scale = prec
|
||||
}
|
||||
}
|
||||
|
||||
type message struct {
|
||||
arg int
|
||||
kind int
|
||||
scale int
|
||||
cases []interface{}
|
||||
}
|
||||
|
||||
const (
|
||||
// Start with non-ASCII to allow skipping values.
|
||||
kindDefault = 0x80 + iota
|
||||
kindScale // verb f, number of fraction digits follows
|
||||
kindScientific // verb e, number of fraction digits follows
|
||||
kindPrecision // verb g, number of significant digits follows
|
||||
)
|
||||
|
||||
var handle = catmsg.Register("golang.org/x/text/feature/plural:plural", execute)
|
||||
|
||||
func (m *message) Compile(e *catmsg.Encoder) error {
|
||||
e.EncodeMessageType(handle)
|
||||
|
||||
e.EncodeUint(uint64(m.arg))
|
||||
|
||||
e.EncodeUint(uint64(m.kind))
|
||||
if m.kind > kindDefault {
|
||||
e.EncodeUint(uint64(m.scale))
|
||||
}
|
||||
|
||||
forms := validForms(cardinal, e.Language())
|
||||
|
||||
for i := 0; i < len(m.cases); {
|
||||
if err := compileSelector(e, forms, m.cases[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
if i++; i >= len(m.cases) {
|
||||
return fmt.Errorf("plural: no message defined for selector %v", m.cases[i-1])
|
||||
}
|
||||
var msg catalog.Message
|
||||
switch x := m.cases[i].(type) {
|
||||
case string:
|
||||
msg = catalog.String(x)
|
||||
case catalog.Message:
|
||||
msg = x
|
||||
default:
|
||||
return fmt.Errorf("plural: message of type %T; must be string or catalog.Message", x)
|
||||
}
|
||||
if err := e.EncodeMessage(msg); err != nil {
|
||||
return err
|
||||
}
|
||||
i++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func compileSelector(e *catmsg.Encoder, valid []Form, selector interface{}) error {
|
||||
form := Other
|
||||
switch x := selector.(type) {
|
||||
case string:
|
||||
if x == "" {
|
||||
return fmt.Errorf("plural: empty selector")
|
||||
}
|
||||
if c := x[0]; c == '=' || c == '<' {
|
||||
val, err := strconv.ParseUint(x[1:], 10, 16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("plural: invalid number in selector %q: %v", selector, err)
|
||||
}
|
||||
e.EncodeUint(uint64(c))
|
||||
e.EncodeUint(val)
|
||||
return nil
|
||||
}
|
||||
var ok bool
|
||||
form, ok = countMap[x]
|
||||
if !ok {
|
||||
return fmt.Errorf("plural: invalid plural form %q", selector)
|
||||
}
|
||||
case Form:
|
||||
form = x
|
||||
default:
|
||||
return fmt.Errorf("plural: selector of type %T; want string or Form", selector)
|
||||
}
|
||||
|
||||
ok := false
|
||||
for _, f := range valid {
|
||||
if f == form {
|
||||
ok = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
return fmt.Errorf("plural: form %q not supported for language %q", selector, e.Language())
|
||||
}
|
||||
e.EncodeUint(uint64(form))
|
||||
return nil
|
||||
}
|
||||
|
||||
func execute(d *catmsg.Decoder) bool {
|
||||
lang := d.Language()
|
||||
argN := int(d.DecodeUint())
|
||||
kind := int(d.DecodeUint())
|
||||
scale := -1 // default
|
||||
if kind > kindDefault {
|
||||
scale = int(d.DecodeUint())
|
||||
}
|
||||
form := Other
|
||||
n := -1
|
||||
if arg := d.Arg(argN); arg == nil {
|
||||
// Default to Other.
|
||||
} else if x, ok := arg.(number.VisibleDigits); ok {
|
||||
d := x.Digits(nil, lang, scale)
|
||||
form, n = cardinal.matchDisplayDigits(lang, &d)
|
||||
} else if x, ok := arg.(Interface); ok {
|
||||
// This covers lists and formatters from the number package.
|
||||
form, n = x.PluralForm(lang, scale)
|
||||
} else {
|
||||
var f number.Formatter
|
||||
switch kind {
|
||||
case kindScale:
|
||||
f.InitDecimal(lang)
|
||||
f.SetScale(scale)
|
||||
case kindScientific:
|
||||
f.InitScientific(lang)
|
||||
f.SetScale(scale)
|
||||
case kindPrecision:
|
||||
f.InitDecimal(lang)
|
||||
f.SetPrecision(scale)
|
||||
case kindDefault:
|
||||
// sensible default
|
||||
f.InitDecimal(lang)
|
||||
if k := reflect.TypeOf(arg).Kind(); reflect.Int <= k && k <= reflect.Uintptr {
|
||||
f.SetScale(0)
|
||||
} else {
|
||||
f.SetScale(2)
|
||||
}
|
||||
}
|
||||
var dec number.Decimal // TODO: buffer in Printer
|
||||
dec.Convert(f.RoundingContext, arg)
|
||||
v := number.FormatDigits(&dec, f.RoundingContext)
|
||||
if !v.NaN && !v.Inf {
|
||||
form, n = cardinal.matchDisplayDigits(d.Language(), &v)
|
||||
}
|
||||
}
|
||||
for !d.Done() {
|
||||
f := d.DecodeUint()
|
||||
if (f == '=' && n == int(d.DecodeUint())) ||
|
||||
(f == '<' && 0 <= n && n < int(d.DecodeUint())) ||
|
||||
form == Form(f) ||
|
||||
Other == Form(f) {
|
||||
return d.ExecuteMessage()
|
||||
}
|
||||
d.SkipMessage()
|
||||
}
|
||||
return false
|
||||
}
|
@ -0,0 +1,261 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go
|
||||
|
||||
// Package plural provides utilities for handling linguistic plurals in text.
|
||||
//
|
||||
// The definitions in this package are based on the plural rule handling defined
|
||||
// in CLDR. See
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules for
|
||||
// details.
|
||||
package plural
|
||||
|
||||
import (
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/internal/number"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// Rules defines the plural rules for all languages for a certain plural type.
|
||||
//
|
||||
//
|
||||
// This package is UNDER CONSTRUCTION and its API may change.
|
||||
type Rules struct {
|
||||
rules []pluralCheck
|
||||
index []byte
|
||||
langToIndex []byte
|
||||
inclusionMasks []uint64
|
||||
}
|
||||
|
||||
var (
|
||||
// Cardinal defines the plural rules for numbers indicating quantities.
|
||||
Cardinal *Rules = cardinal
|
||||
|
||||
// Ordinal defines the plural rules for numbers indicating position
|
||||
// (first, second, etc.).
|
||||
Ordinal *Rules = ordinal
|
||||
|
||||
ordinal = &Rules{
|
||||
ordinalRules,
|
||||
ordinalIndex,
|
||||
ordinalLangToIndex,
|
||||
ordinalInclusionMasks[:],
|
||||
}
|
||||
|
||||
cardinal = &Rules{
|
||||
cardinalRules,
|
||||
cardinalIndex,
|
||||
cardinalLangToIndex,
|
||||
cardinalInclusionMasks[:],
|
||||
}
|
||||
)
|
||||
|
||||
// getIntApprox converts the digits in slice digits[start:end] to an integer
|
||||
// according to the following rules:
|
||||
// - Let i be asInt(digits[start:end]), where out-of-range digits are assumed
|
||||
// to be zero.
|
||||
// - Result n is big if i / 10^nMod > 1.
|
||||
// - Otherwise the result is i % 10^nMod.
|
||||
//
|
||||
// For example, if digits is {1, 2, 3} and start:end is 0:5, then the result
|
||||
// for various values of nMod is:
|
||||
// - when nMod == 2, n == big
|
||||
// - when nMod == 3, n == big
|
||||
// - when nMod == 4, n == big
|
||||
// - when nMod == 5, n == 12300
|
||||
// - when nMod == 6, n == 12300
|
||||
// - when nMod == 7, n == 12300
|
||||
func getIntApprox(digits []byte, start, end, nMod, big int) (n int) {
|
||||
// Leading 0 digits just result in 0.
|
||||
p := start
|
||||
if p < 0 {
|
||||
p = 0
|
||||
}
|
||||
// Range only over the part for which we have digits.
|
||||
mid := end
|
||||
if mid >= len(digits) {
|
||||
mid = len(digits)
|
||||
}
|
||||
// Check digits more significant that nMod.
|
||||
if q := end - nMod; q > 0 {
|
||||
if q > mid {
|
||||
q = mid
|
||||
}
|
||||
for ; p < q; p++ {
|
||||
if digits[p] != 0 {
|
||||
return big
|
||||
}
|
||||
}
|
||||
}
|
||||
for ; p < mid; p++ {
|
||||
n = 10*n + int(digits[p])
|
||||
}
|
||||
// Multiply for trailing zeros.
|
||||
for ; p < end; p++ {
|
||||
n *= 10
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// MatchDigits computes the plural form for the given language and the given
|
||||
// decimal floating point digits. The digits are stored in big-endian order and
|
||||
// are of value byte(0) - byte(9). The floating point position is indicated by
|
||||
// exp and the number of visible decimals is scale. All leading and trailing
|
||||
// zeros may be omitted from digits.
|
||||
//
|
||||
// The following table contains examples of possible arguments to represent
|
||||
// the given numbers.
|
||||
// decimal digits exp scale
|
||||
// 123 []byte{1, 2, 3} 3 0
|
||||
// 123.4 []byte{1, 2, 3, 4} 3 1
|
||||
// 123.40 []byte{1, 2, 3, 4} 3 2
|
||||
// 100000 []byte{1} 6 0
|
||||
// 100000.00 []byte{1} 6 3
|
||||
func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form {
|
||||
index := tagToID(t)
|
||||
|
||||
// Differentiate up to including mod 1000000 for the integer part.
|
||||
n := getIntApprox(digits, 0, exp, 6, 1000000)
|
||||
|
||||
// Differentiate up to including mod 100 for the fractional part.
|
||||
f := getIntApprox(digits, exp, exp+scale, 2, 100)
|
||||
|
||||
return matchPlural(p, index, n, f, scale)
|
||||
}
|
||||
|
||||
func (p *Rules) matchDisplayDigits(t language.Tag, d *number.Digits) (Form, int) {
|
||||
n := getIntApprox(d.Digits, 0, int(d.Exp), 6, 1000000)
|
||||
return p.MatchDigits(t, d.Digits, int(d.Exp), d.NumFracDigits()), n
|
||||
}
|
||||
|
||||
func validForms(p *Rules, t language.Tag) (forms []Form) {
|
||||
offset := p.langToIndex[tagToID(t)]
|
||||
rules := p.rules[p.index[offset]:p.index[offset+1]]
|
||||
|
||||
forms = append(forms, Other)
|
||||
last := Other
|
||||
for _, r := range rules {
|
||||
if cat := Form(r.cat & formMask); cat != andNext && last != cat {
|
||||
forms = append(forms, cat)
|
||||
last = cat
|
||||
}
|
||||
}
|
||||
return forms
|
||||
}
|
||||
|
||||
func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form {
|
||||
return matchPlural(p, tagToID(t), n, f, scale)
|
||||
}
|
||||
|
||||
// MatchPlural returns the plural form for the given language and plural
|
||||
// operands (as defined in
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules):
|
||||
// where
|
||||
// n absolute value of the source number (integer and decimals)
|
||||
// input
|
||||
// i integer digits of n.
|
||||
// v number of visible fraction digits in n, with trailing zeros.
|
||||
// w number of visible fraction digits in n, without trailing zeros.
|
||||
// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w))
|
||||
// t visible fractional digits in n, without trailing zeros.
|
||||
//
|
||||
// If any of the operand values is too large to fit in an int, it is okay to
|
||||
// pass the value modulo 10,000,000.
|
||||
func (p *Rules) MatchPlural(lang language.Tag, i, v, w, f, t int) Form {
|
||||
return matchPlural(p, tagToID(lang), i, f, v)
|
||||
}
|
||||
|
||||
func matchPlural(p *Rules, index compact.ID, n, f, v int) Form {
|
||||
nMask := p.inclusionMasks[n%maxMod]
|
||||
// Compute the fMask inline in the rules below, as it is relatively rare.
|
||||
// fMask := p.inclusionMasks[f%maxMod]
|
||||
vMask := p.inclusionMasks[v%maxMod]
|
||||
|
||||
// Do the matching
|
||||
offset := p.langToIndex[index]
|
||||
rules := p.rules[p.index[offset]:p.index[offset+1]]
|
||||
for i := 0; i < len(rules); i++ {
|
||||
rule := rules[i]
|
||||
setBit := uint64(1 << rule.setID)
|
||||
var skip bool
|
||||
switch op := opID(rule.cat >> opShift); op {
|
||||
case opI: // i = x
|
||||
skip = n >= numN || nMask&setBit == 0
|
||||
|
||||
case opI | opNotEqual: // i != x
|
||||
skip = n < numN && nMask&setBit != 0
|
||||
|
||||
case opI | opMod: // i % m = x
|
||||
skip = nMask&setBit == 0
|
||||
|
||||
case opI | opMod | opNotEqual: // i % m != x
|
||||
skip = nMask&setBit != 0
|
||||
|
||||
case opN: // n = x
|
||||
skip = f != 0 || n >= numN || nMask&setBit == 0
|
||||
|
||||
case opN | opNotEqual: // n != x
|
||||
skip = f == 0 && n < numN && nMask&setBit != 0
|
||||
|
||||
case opN | opMod: // n % m = x
|
||||
skip = f != 0 || nMask&setBit == 0
|
||||
|
||||
case opN | opMod | opNotEqual: // n % m != x
|
||||
skip = f == 0 && nMask&setBit != 0
|
||||
|
||||
case opF: // f = x
|
||||
skip = f >= numN || p.inclusionMasks[f%maxMod]&setBit == 0
|
||||
|
||||
case opF | opNotEqual: // f != x
|
||||
skip = f < numN && p.inclusionMasks[f%maxMod]&setBit != 0
|
||||
|
||||
case opF | opMod: // f % m = x
|
||||
skip = p.inclusionMasks[f%maxMod]&setBit == 0
|
||||
|
||||
case opF | opMod | opNotEqual: // f % m != x
|
||||
skip = p.inclusionMasks[f%maxMod]&setBit != 0
|
||||
|
||||
case opV: // v = x
|
||||
skip = v < numN && vMask&setBit == 0
|
||||
|
||||
case opV | opNotEqual: // v != x
|
||||
skip = v < numN && vMask&setBit != 0
|
||||
|
||||
case opW: // w == 0
|
||||
skip = f != 0
|
||||
|
||||
case opW | opNotEqual: // w != 0
|
||||
skip = f == 0
|
||||
|
||||
// Hard-wired rules that cannot be handled by our algorithm.
|
||||
|
||||
case opBretonM:
|
||||
skip = f != 0 || n == 0 || n%1000000 != 0
|
||||
|
||||
case opAzerbaijan00s:
|
||||
// 100,200,300,400,500,600,700,800,900
|
||||
skip = n == 0 || n >= 1000 || n%100 != 0
|
||||
|
||||
case opItalian800:
|
||||
skip = (f != 0 || n >= numN || nMask&setBit == 0) && n != 800
|
||||
}
|
||||
if skip {
|
||||
// advance over AND entries.
|
||||
for ; i < len(rules) && rules[i].cat&formMask == andNext; i++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
// return if we have a final entry.
|
||||
if cat := rule.cat & formMask; cat != andNext {
|
||||
return Form(cat)
|
||||
}
|
||||
}
|
||||
return Other
|
||||
}
|
||||
|
||||
func tagToID(t language.Tag) compact.ID {
|
||||
id, _ := compact.RegionalID(compact.Tag(t))
|
||||
return id
|
||||
}
|
@ -0,0 +1,552 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package plural
|
||||
|
||||
// CLDRVersion is the CLDR version from which the tables in this package are derived.
|
||||
const CLDRVersion = "32"
|
||||
|
||||
var ordinalRules = []pluralCheck{ // 64 elements
|
||||
0: {cat: 0x2f, setID: 0x4},
|
||||
1: {cat: 0x3a, setID: 0x5},
|
||||
2: {cat: 0x22, setID: 0x1},
|
||||
3: {cat: 0x22, setID: 0x6},
|
||||
4: {cat: 0x22, setID: 0x7},
|
||||
5: {cat: 0x2f, setID: 0x8},
|
||||
6: {cat: 0x3c, setID: 0x9},
|
||||
7: {cat: 0x2f, setID: 0xa},
|
||||
8: {cat: 0x3c, setID: 0xb},
|
||||
9: {cat: 0x2c, setID: 0xc},
|
||||
10: {cat: 0x24, setID: 0xd},
|
||||
11: {cat: 0x2d, setID: 0xe},
|
||||
12: {cat: 0x2d, setID: 0xf},
|
||||
13: {cat: 0x2f, setID: 0x10},
|
||||
14: {cat: 0x35, setID: 0x3},
|
||||
15: {cat: 0xc5, setID: 0x11},
|
||||
16: {cat: 0x2, setID: 0x1},
|
||||
17: {cat: 0x5, setID: 0x3},
|
||||
18: {cat: 0xd, setID: 0x12},
|
||||
19: {cat: 0x22, setID: 0x1},
|
||||
20: {cat: 0x2f, setID: 0x13},
|
||||
21: {cat: 0x3d, setID: 0x14},
|
||||
22: {cat: 0x2f, setID: 0x15},
|
||||
23: {cat: 0x3a, setID: 0x16},
|
||||
24: {cat: 0x2f, setID: 0x17},
|
||||
25: {cat: 0x3b, setID: 0x18},
|
||||
26: {cat: 0x2f, setID: 0xa},
|
||||
27: {cat: 0x3c, setID: 0xb},
|
||||
28: {cat: 0x22, setID: 0x1},
|
||||
29: {cat: 0x23, setID: 0x19},
|
||||
30: {cat: 0x24, setID: 0x1a},
|
||||
31: {cat: 0x22, setID: 0x1b},
|
||||
32: {cat: 0x23, setID: 0x2},
|
||||
33: {cat: 0x24, setID: 0x1a},
|
||||
34: {cat: 0xf, setID: 0x15},
|
||||
35: {cat: 0x1a, setID: 0x16},
|
||||
36: {cat: 0xf, setID: 0x17},
|
||||
37: {cat: 0x1b, setID: 0x18},
|
||||
38: {cat: 0xf, setID: 0x1c},
|
||||
39: {cat: 0x1d, setID: 0x1d},
|
||||
40: {cat: 0xa, setID: 0x1e},
|
||||
41: {cat: 0xa, setID: 0x1f},
|
||||
42: {cat: 0xc, setID: 0x20},
|
||||
43: {cat: 0xe4, setID: 0x0},
|
||||
44: {cat: 0x5, setID: 0x3},
|
||||
45: {cat: 0xd, setID: 0xe},
|
||||
46: {cat: 0xd, setID: 0x21},
|
||||
47: {cat: 0x22, setID: 0x1},
|
||||
48: {cat: 0x23, setID: 0x19},
|
||||
49: {cat: 0x24, setID: 0x1a},
|
||||
50: {cat: 0x25, setID: 0x22},
|
||||
51: {cat: 0x22, setID: 0x23},
|
||||
52: {cat: 0x23, setID: 0x19},
|
||||
53: {cat: 0x24, setID: 0x1a},
|
||||
54: {cat: 0x25, setID: 0x22},
|
||||
55: {cat: 0x22, setID: 0x24},
|
||||
56: {cat: 0x23, setID: 0x19},
|
||||
57: {cat: 0x24, setID: 0x1a},
|
||||
58: {cat: 0x25, setID: 0x22},
|
||||
59: {cat: 0x21, setID: 0x25},
|
||||
60: {cat: 0x22, setID: 0x1},
|
||||
61: {cat: 0x23, setID: 0x2},
|
||||
62: {cat: 0x24, setID: 0x26},
|
||||
63: {cat: 0x25, setID: 0x27},
|
||||
} // Size: 152 bytes
|
||||
|
||||
var ordinalIndex = []uint8{ // 22 elements
|
||||
0x00, 0x00, 0x02, 0x03, 0x04, 0x05, 0x07, 0x09,
|
||||
0x0b, 0x0f, 0x10, 0x13, 0x16, 0x1c, 0x1f, 0x22,
|
||||
0x28, 0x2f, 0x33, 0x37, 0x3b, 0x40,
|
||||
} // Size: 46 bytes
|
||||
|
||||
var ordinalLangToIndex = []uint8{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x12, 0x12, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10,
|
||||
0x10, 0x10, 0x10, 0x00, 0x00, 0x05, 0x05, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 40 - 7F
|
||||
0x12, 0x12, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
|
||||
0x0e, 0x0e, 0x0e, 0x0e, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x14, 0x14, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 80 - BF
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
// Entry C0 - FF
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
|
||||
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 100 - 13F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
|
||||
0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
// Entry 140 - 17F
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x11, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
|
||||
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03,
|
||||
0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 180 - 1BF
|
||||
0x00, 0x00, 0x00, 0x00, 0x09, 0x09, 0x09, 0x09,
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x0a, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 1C0 - 1FF
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00,
|
||||
0x00, 0x00, 0x02, 0x0d, 0x0d, 0x02, 0x02, 0x02,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 200 - 23F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x13, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 240 - 27F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
|
||||
0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 280 - 2BF
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x0b, 0x0b, 0x0b, 0x0b, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x07, 0x07, 0x02, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 2C0 - 2FF
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 300 - 33F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x0c,
|
||||
} // Size: 799 bytes
|
||||
|
||||
var ordinalInclusionMasks = []uint64{ // 100 elements
|
||||
// Entry 0 - 1F
|
||||
0x0000002000010009, 0x00000018482000d3, 0x0000000042840195, 0x000000410a040581,
|
||||
0x00000041040c0081, 0x0000009840040041, 0x0000008400045001, 0x0000003850040001,
|
||||
0x0000003850060001, 0x0000003800049001, 0x0000000800052001, 0x0000000040660031,
|
||||
0x0000000041840331, 0x0000000100040f01, 0x00000001001c0001, 0x0000000040040001,
|
||||
0x0000000000045001, 0x0000000070040001, 0x0000000070040001, 0x0000000000049001,
|
||||
0x0000000080050001, 0x0000000040200011, 0x0000000040800111, 0x0000000100000501,
|
||||
0x0000000100080001, 0x0000000040000001, 0x0000000000005001, 0x0000000050000001,
|
||||
0x0000000050000001, 0x0000000000009001, 0x0000000000010001, 0x0000000040200011,
|
||||
// Entry 20 - 3F
|
||||
0x0000000040800111, 0x0000000100000501, 0x0000000100080001, 0x0000000040000001,
|
||||
0x0000000000005001, 0x0000000050000001, 0x0000000050000001, 0x0000000000009001,
|
||||
0x0000000200050001, 0x0000000040200011, 0x0000000040800111, 0x0000000100000501,
|
||||
0x0000000100080001, 0x0000000040000001, 0x0000000000005001, 0x0000000050000001,
|
||||
0x0000000050000001, 0x0000000000009001, 0x0000000080010001, 0x0000000040200011,
|
||||
0x0000000040800111, 0x0000000100000501, 0x0000000100080001, 0x0000000040000001,
|
||||
0x0000000000005001, 0x0000000050000001, 0x0000000050000001, 0x0000000000009001,
|
||||
0x0000000200050001, 0x0000000040200011, 0x0000000040800111, 0x0000000100000501,
|
||||
// Entry 40 - 5F
|
||||
0x0000000100080001, 0x0000000040000001, 0x0000000000005001, 0x0000000050000001,
|
||||
0x0000000050000001, 0x0000000000009001, 0x0000000080010001, 0x0000000040200011,
|
||||
0x0000000040800111, 0x0000000100000501, 0x0000000100080001, 0x0000000040000001,
|
||||
0x0000000000005001, 0x0000000050000001, 0x0000000050000001, 0x0000000000009001,
|
||||
0x0000000080070001, 0x0000000040200011, 0x0000000040800111, 0x0000000100000501,
|
||||
0x0000000100080001, 0x0000000040000001, 0x0000000000005001, 0x0000000050000001,
|
||||
0x0000000050000001, 0x0000000000009001, 0x0000000200010001, 0x0000000040200011,
|
||||
0x0000000040800111, 0x0000000100000501, 0x0000000100080001, 0x0000000040000001,
|
||||
// Entry 60 - 7F
|
||||
0x0000000000005001, 0x0000000050000001, 0x0000000050000001, 0x0000000000009001,
|
||||
} // Size: 824 bytes
|
||||
|
||||
// Slots used for ordinal: 40 of 0xFF rules; 16 of 0xFF indexes; 40 of 64 sets
|
||||
|
||||
var cardinalRules = []pluralCheck{ // 166 elements
|
||||
0: {cat: 0x2, setID: 0x3},
|
||||
1: {cat: 0x22, setID: 0x1},
|
||||
2: {cat: 0x2, setID: 0x4},
|
||||
3: {cat: 0x2, setID: 0x4},
|
||||
4: {cat: 0x7, setID: 0x1},
|
||||
5: {cat: 0x62, setID: 0x3},
|
||||
6: {cat: 0x22, setID: 0x4},
|
||||
7: {cat: 0x7, setID: 0x3},
|
||||
8: {cat: 0x42, setID: 0x1},
|
||||
9: {cat: 0x22, setID: 0x4},
|
||||
10: {cat: 0x22, setID: 0x4},
|
||||
11: {cat: 0x22, setID: 0x5},
|
||||
12: {cat: 0x22, setID: 0x1},
|
||||
13: {cat: 0x22, setID: 0x1},
|
||||
14: {cat: 0x7, setID: 0x4},
|
||||
15: {cat: 0x92, setID: 0x3},
|
||||
16: {cat: 0xf, setID: 0x6},
|
||||
17: {cat: 0x1f, setID: 0x7},
|
||||
18: {cat: 0x82, setID: 0x3},
|
||||
19: {cat: 0x92, setID: 0x3},
|
||||
20: {cat: 0xf, setID: 0x6},
|
||||
21: {cat: 0x62, setID: 0x3},
|
||||
22: {cat: 0x4a, setID: 0x6},
|
||||
23: {cat: 0x7, setID: 0x8},
|
||||
24: {cat: 0x62, setID: 0x3},
|
||||
25: {cat: 0x1f, setID: 0x9},
|
||||
26: {cat: 0x62, setID: 0x3},
|
||||
27: {cat: 0x5f, setID: 0x9},
|
||||
28: {cat: 0x72, setID: 0x3},
|
||||
29: {cat: 0x29, setID: 0xa},
|
||||
30: {cat: 0x29, setID: 0xb},
|
||||
31: {cat: 0x4f, setID: 0xb},
|
||||
32: {cat: 0x61, setID: 0x2},
|
||||
33: {cat: 0x2f, setID: 0x6},
|
||||
34: {cat: 0x3a, setID: 0x7},
|
||||
35: {cat: 0x4f, setID: 0x6},
|
||||
36: {cat: 0x5f, setID: 0x7},
|
||||
37: {cat: 0x62, setID: 0x2},
|
||||
38: {cat: 0x4f, setID: 0x6},
|
||||
39: {cat: 0x72, setID: 0x2},
|
||||
40: {cat: 0x21, setID: 0x3},
|
||||
41: {cat: 0x7, setID: 0x4},
|
||||
42: {cat: 0x32, setID: 0x3},
|
||||
43: {cat: 0x21, setID: 0x3},
|
||||
44: {cat: 0x22, setID: 0x1},
|
||||
45: {cat: 0x22, setID: 0x1},
|
||||
46: {cat: 0x23, setID: 0x2},
|
||||
47: {cat: 0x2, setID: 0x3},
|
||||
48: {cat: 0x22, setID: 0x1},
|
||||
49: {cat: 0x24, setID: 0xc},
|
||||
50: {cat: 0x7, setID: 0x1},
|
||||
51: {cat: 0x62, setID: 0x3},
|
||||
52: {cat: 0x74, setID: 0x3},
|
||||
53: {cat: 0x24, setID: 0x3},
|
||||
54: {cat: 0x2f, setID: 0xd},
|
||||
55: {cat: 0x34, setID: 0x1},
|
||||
56: {cat: 0xf, setID: 0x6},
|
||||
57: {cat: 0x1f, setID: 0x7},
|
||||
58: {cat: 0x62, setID: 0x3},
|
||||
59: {cat: 0x4f, setID: 0x6},
|
||||
60: {cat: 0x5a, setID: 0x7},
|
||||
61: {cat: 0xf, setID: 0xe},
|
||||
62: {cat: 0x1f, setID: 0xf},
|
||||
63: {cat: 0x64, setID: 0x3},
|
||||
64: {cat: 0x4f, setID: 0xe},
|
||||
65: {cat: 0x5c, setID: 0xf},
|
||||
66: {cat: 0x22, setID: 0x10},
|
||||
67: {cat: 0x23, setID: 0x11},
|
||||
68: {cat: 0x24, setID: 0x12},
|
||||
69: {cat: 0xf, setID: 0x1},
|
||||
70: {cat: 0x62, setID: 0x3},
|
||||
71: {cat: 0xf, setID: 0x2},
|
||||
72: {cat: 0x63, setID: 0x3},
|
||||
73: {cat: 0xf, setID: 0x13},
|
||||
74: {cat: 0x64, setID: 0x3},
|
||||
75: {cat: 0x74, setID: 0x3},
|
||||
76: {cat: 0xf, setID: 0x1},
|
||||
77: {cat: 0x62, setID: 0x3},
|
||||
78: {cat: 0x4a, setID: 0x1},
|
||||
79: {cat: 0xf, setID: 0x2},
|
||||
80: {cat: 0x63, setID: 0x3},
|
||||
81: {cat: 0x4b, setID: 0x2},
|
||||
82: {cat: 0xf, setID: 0x13},
|
||||
83: {cat: 0x64, setID: 0x3},
|
||||
84: {cat: 0x4c, setID: 0x13},
|
||||
85: {cat: 0x7, setID: 0x1},
|
||||
86: {cat: 0x62, setID: 0x3},
|
||||
87: {cat: 0x7, setID: 0x2},
|
||||
88: {cat: 0x63, setID: 0x3},
|
||||
89: {cat: 0x2f, setID: 0xa},
|
||||
90: {cat: 0x37, setID: 0x14},
|
||||
91: {cat: 0x65, setID: 0x3},
|
||||
92: {cat: 0x7, setID: 0x1},
|
||||
93: {cat: 0x62, setID: 0x3},
|
||||
94: {cat: 0x7, setID: 0x15},
|
||||
95: {cat: 0x64, setID: 0x3},
|
||||
96: {cat: 0x75, setID: 0x3},
|
||||
97: {cat: 0x7, setID: 0x1},
|
||||
98: {cat: 0x62, setID: 0x3},
|
||||
99: {cat: 0xf, setID: 0xe},
|
||||
100: {cat: 0x1f, setID: 0xf},
|
||||
101: {cat: 0x64, setID: 0x3},
|
||||
102: {cat: 0xf, setID: 0x16},
|
||||
103: {cat: 0x17, setID: 0x1},
|
||||
104: {cat: 0x65, setID: 0x3},
|
||||
105: {cat: 0xf, setID: 0x17},
|
||||
106: {cat: 0x65, setID: 0x3},
|
||||
107: {cat: 0xf, setID: 0xf},
|
||||
108: {cat: 0x65, setID: 0x3},
|
||||
109: {cat: 0x2f, setID: 0x6},
|
||||
110: {cat: 0x3a, setID: 0x7},
|
||||
111: {cat: 0x2f, setID: 0xe},
|
||||
112: {cat: 0x3c, setID: 0xf},
|
||||
113: {cat: 0x2d, setID: 0xa},
|
||||
114: {cat: 0x2d, setID: 0x17},
|
||||
115: {cat: 0x2d, setID: 0x18},
|
||||
116: {cat: 0x2f, setID: 0x6},
|
||||
117: {cat: 0x3a, setID: 0xb},
|
||||
118: {cat: 0x2f, setID: 0x19},
|
||||
119: {cat: 0x3c, setID: 0xb},
|
||||
120: {cat: 0x55, setID: 0x3},
|
||||
121: {cat: 0x22, setID: 0x1},
|
||||
122: {cat: 0x24, setID: 0x3},
|
||||
123: {cat: 0x2c, setID: 0xc},
|
||||
124: {cat: 0x2d, setID: 0xb},
|
||||
125: {cat: 0xf, setID: 0x6},
|
||||
126: {cat: 0x1f, setID: 0x7},
|
||||
127: {cat: 0x62, setID: 0x3},
|
||||
128: {cat: 0xf, setID: 0xe},
|
||||
129: {cat: 0x1f, setID: 0xf},
|
||||
130: {cat: 0x64, setID: 0x3},
|
||||
131: {cat: 0xf, setID: 0xa},
|
||||
132: {cat: 0x65, setID: 0x3},
|
||||
133: {cat: 0xf, setID: 0x17},
|
||||
134: {cat: 0x65, setID: 0x3},
|
||||
135: {cat: 0xf, setID: 0x18},
|
||||
136: {cat: 0x65, setID: 0x3},
|
||||
137: {cat: 0x2f, setID: 0x6},
|
||||
138: {cat: 0x3a, setID: 0x1a},
|
||||
139: {cat: 0x2f, setID: 0x1b},
|
||||
140: {cat: 0x3b, setID: 0x1c},
|
||||
141: {cat: 0x2f, setID: 0x1d},
|
||||
142: {cat: 0x3c, setID: 0x1e},
|
||||
143: {cat: 0x37, setID: 0x3},
|
||||
144: {cat: 0xa5, setID: 0x0},
|
||||
145: {cat: 0x22, setID: 0x1},
|
||||
146: {cat: 0x23, setID: 0x2},
|
||||
147: {cat: 0x24, setID: 0x1f},
|
||||
148: {cat: 0x25, setID: 0x20},
|
||||
149: {cat: 0xf, setID: 0x6},
|
||||
150: {cat: 0x62, setID: 0x3},
|
||||
151: {cat: 0xf, setID: 0x1b},
|
||||
152: {cat: 0x63, setID: 0x3},
|
||||
153: {cat: 0xf, setID: 0x21},
|
||||
154: {cat: 0x64, setID: 0x3},
|
||||
155: {cat: 0x75, setID: 0x3},
|
||||
156: {cat: 0x21, setID: 0x3},
|
||||
157: {cat: 0x22, setID: 0x1},
|
||||
158: {cat: 0x23, setID: 0x2},
|
||||
159: {cat: 0x2c, setID: 0x22},
|
||||
160: {cat: 0x2d, setID: 0x5},
|
||||
161: {cat: 0x21, setID: 0x3},
|
||||
162: {cat: 0x22, setID: 0x1},
|
||||
163: {cat: 0x23, setID: 0x2},
|
||||
164: {cat: 0x24, setID: 0x23},
|
||||
165: {cat: 0x25, setID: 0x24},
|
||||
} // Size: 356 bytes
|
||||
|
||||
var cardinalIndex = []uint8{ // 36 elements
|
||||
0x00, 0x00, 0x02, 0x03, 0x04, 0x06, 0x09, 0x0a,
|
||||
0x0c, 0x0d, 0x10, 0x14, 0x17, 0x1d, 0x28, 0x2b,
|
||||
0x2d, 0x2f, 0x32, 0x38, 0x42, 0x45, 0x4c, 0x55,
|
||||
0x5c, 0x61, 0x6d, 0x74, 0x79, 0x7d, 0x89, 0x91,
|
||||
0x95, 0x9c, 0xa1, 0xa6,
|
||||
} // Size: 60 bytes
|
||||
|
||||
var cardinalLangToIndex = []uint8{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x00, 0x08, 0x08, 0x08, 0x00, 0x00, 0x06, 0x06,
|
||||
0x01, 0x01, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
|
||||
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
|
||||
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
|
||||
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
|
||||
0x01, 0x01, 0x08, 0x08, 0x04, 0x04, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x00, 0x00, 0x1a, 0x1a, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x06, 0x00, 0x00,
|
||||
// Entry 40 - 7F
|
||||
0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x1e, 0x1e,
|
||||
0x08, 0x08, 0x13, 0x13, 0x13, 0x13, 0x13, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x18, 0x18, 0x00, 0x00, 0x22, 0x22, 0x09, 0x09,
|
||||
0x09, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x00, 0x00, 0x16, 0x16, 0x00,
|
||||
0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 80 - BF
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
// Entry C0 - FF
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
// Entry 100 - 13F
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04,
|
||||
0x08, 0x08, 0x00, 0x00, 0x01, 0x01, 0x01, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x04, 0x04, 0x0c, 0x0c,
|
||||
0x08, 0x08, 0x08, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
// Entry 140 - 17F
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x08, 0x08, 0x04, 0x04, 0x1f, 0x1f,
|
||||
0x14, 0x14, 0x04, 0x04, 0x08, 0x08, 0x08, 0x08,
|
||||
0x01, 0x01, 0x06, 0x00, 0x00, 0x20, 0x20, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x17, 0x17, 0x01,
|
||||
0x01, 0x13, 0x13, 0x13, 0x16, 0x16, 0x08, 0x08,
|
||||
0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 180 - 1BF
|
||||
0x00, 0x04, 0x0a, 0x0a, 0x04, 0x04, 0x04, 0x04,
|
||||
0x04, 0x10, 0x17, 0x00, 0x00, 0x00, 0x08, 0x08,
|
||||
0x04, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08, 0x02,
|
||||
0x02, 0x08, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08,
|
||||
0x08, 0x08, 0x00, 0x00, 0x0f, 0x0f, 0x08, 0x10,
|
||||
// Entry 1C0 - 1FF
|
||||
0x10, 0x08, 0x08, 0x0e, 0x0e, 0x08, 0x08, 0x08,
|
||||
0x08, 0x00, 0x00, 0x06, 0x06, 0x06, 0x06, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x1b, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x0d, 0x08,
|
||||
0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06,
|
||||
0x00, 0x00, 0x08, 0x08, 0x0b, 0x0b, 0x08, 0x08,
|
||||
0x08, 0x08, 0x12, 0x01, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 200 - 23F
|
||||
0x00, 0x08, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, 0x08,
|
||||
0x06, 0x00, 0x00, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x06, 0x06,
|
||||
0x06, 0x06, 0x06, 0x08, 0x19, 0x19, 0x0d, 0x0d,
|
||||
0x08, 0x08, 0x03, 0x04, 0x03, 0x04, 0x04, 0x04,
|
||||
// Entry 240 - 27F
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x08, 0x08, 0x00, 0x00, 0x12,
|
||||
0x12, 0x12, 0x08, 0x08, 0x1d, 0x1d, 0x1d, 0x1d,
|
||||
0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x08, 0x08, 0x00,
|
||||
0x00, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x00, 0x00,
|
||||
0x00, 0x00, 0x13, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x05, 0x05, 0x18, 0x18, 0x15, 0x15, 0x10, 0x10,
|
||||
// Entry 280 - 2BF
|
||||
0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x13,
|
||||
0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
|
||||
0x13, 0x13, 0x08, 0x08, 0x08, 0x04, 0x04, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x06,
|
||||
0x08, 0x08, 0x08, 0x0c, 0x08, 0x00, 0x00, 0x08,
|
||||
// Entry 2C0 - 2FF
|
||||
0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x07,
|
||||
0x07, 0x08, 0x08, 0x1d, 0x1d, 0x04, 0x04, 0x04,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x08,
|
||||
0x08, 0x08, 0x08, 0x06, 0x08, 0x08, 0x00, 0x00,
|
||||
0x08, 0x08, 0x08, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 300 - 33F
|
||||
0x00, 0x00, 0x00, 0x01, 0x01, 0x04, 0x04,
|
||||
} // Size: 799 bytes
|
||||
|
||||
var cardinalInclusionMasks = []uint64{ // 100 elements
|
||||
// Entry 0 - 1F
|
||||
0x0000000200500419, 0x0000000000512153, 0x000000000a327105, 0x0000000ca23c7101,
|
||||
0x00000004a23c7201, 0x0000000482943001, 0x0000001482943201, 0x0000000502943001,
|
||||
0x0000000502943001, 0x0000000522943201, 0x0000000540543401, 0x00000000454128e1,
|
||||
0x000000005b02e821, 0x000000006304e821, 0x000000006304ea21, 0x0000000042842821,
|
||||
0x0000000042842a21, 0x0000000042842821, 0x0000000042842821, 0x0000000062842a21,
|
||||
0x0000000200400421, 0x0000000000400061, 0x000000000a004021, 0x0000000022004021,
|
||||
0x0000000022004221, 0x0000000002800021, 0x0000000002800221, 0x0000000002800021,
|
||||
0x0000000002800021, 0x0000000022800221, 0x0000000000400421, 0x0000000000400061,
|
||||
// Entry 20 - 3F
|
||||
0x000000000a004021, 0x0000000022004021, 0x0000000022004221, 0x0000000002800021,
|
||||
0x0000000002800221, 0x0000000002800021, 0x0000000002800021, 0x0000000022800221,
|
||||
0x0000000200400421, 0x0000000000400061, 0x000000000a004021, 0x0000000022004021,
|
||||
0x0000000022004221, 0x0000000002800021, 0x0000000002800221, 0x0000000002800021,
|
||||
0x0000000002800021, 0x0000000022800221, 0x0000000000400421, 0x0000000000400061,
|
||||
0x000000000a004021, 0x0000000022004021, 0x0000000022004221, 0x0000000002800021,
|
||||
0x0000000002800221, 0x0000000002800021, 0x0000000002800021, 0x0000000022800221,
|
||||
0x0000000200400421, 0x0000000000400061, 0x000000000a004021, 0x0000000022004021,
|
||||
// Entry 40 - 5F
|
||||
0x0000000022004221, 0x0000000002800021, 0x0000000002800221, 0x0000000002800021,
|
||||
0x0000000002800021, 0x0000000022800221, 0x0000000040400421, 0x0000000044400061,
|
||||
0x000000005a004021, 0x0000000062004021, 0x0000000062004221, 0x0000000042800021,
|
||||
0x0000000042800221, 0x0000000042800021, 0x0000000042800021, 0x0000000062800221,
|
||||
0x0000000200400421, 0x0000000000400061, 0x000000000a004021, 0x0000000022004021,
|
||||
0x0000000022004221, 0x0000000002800021, 0x0000000002800221, 0x0000000002800021,
|
||||
0x0000000002800021, 0x0000000022800221, 0x0000000040400421, 0x0000000044400061,
|
||||
0x000000005a004021, 0x0000000062004021, 0x0000000062004221, 0x0000000042800021,
|
||||
// Entry 60 - 7F
|
||||
0x0000000042800221, 0x0000000042800021, 0x0000000042800021, 0x0000000062800221,
|
||||
} // Size: 824 bytes
|
||||
|
||||
// Slots used for cardinal: A6 of 0xFF rules; 24 of 0xFF indexes; 37 of 64 sets
|
||||
|
||||
// Total table size 3860 bytes (3KiB); checksum: 4E56F7B1
|
@ -0,0 +1,415 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package catmsg contains support types for package x/text/message/catalog.
|
||||
//
|
||||
// This package contains the low-level implementations of Message used by the
|
||||
// catalog package and provides primitives for other packages to implement their
|
||||
// own. For instance, the plural package provides functionality for selecting
|
||||
// translation strings based on the plural category of substitution arguments.
|
||||
//
|
||||
//
|
||||
// Encoding and Decoding
|
||||
//
|
||||
// Catalogs store Messages encoded as a single string. Compiling a message into
|
||||
// a string both results in compacter representation and speeds up evaluation.
|
||||
//
|
||||
// A Message must implement a Compile method to convert its arbitrary
|
||||
// representation to a string. The Compile method takes an Encoder which
|
||||
// facilitates serializing the message. Encoders also provide more context of
|
||||
// the messages's creation (such as for which language the message is intended),
|
||||
// which may not be known at the time of the creation of the message.
|
||||
//
|
||||
// Each message type must also have an accompanying decoder registered to decode
|
||||
// the message. This decoder takes a Decoder argument which provides the
|
||||
// counterparts for the decoding.
|
||||
//
|
||||
//
|
||||
// Renderers
|
||||
//
|
||||
// A Decoder must be initialized with a Renderer implementation. These
|
||||
// implementations must be provided by packages that use Catalogs, typically
|
||||
// formatting packages such as x/text/message. A typical user will not need to
|
||||
// worry about this type; it is only relevant to packages that do string
|
||||
// formatting and want to use the catalog package to handle localized strings.
|
||||
//
|
||||
// A package that uses catalogs for selecting strings receives selection results
|
||||
// as sequence of substrings passed to the Renderer. The following snippet shows
|
||||
// how to express the above example using the message package.
|
||||
//
|
||||
// message.Set(language.English, "You are %d minute(s) late.",
|
||||
// catalog.Var("minutes", plural.Select(1, "one", "minute")),
|
||||
// catalog.String("You are %[1]d ${minutes} late."))
|
||||
//
|
||||
// p := message.NewPrinter(language.English)
|
||||
// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
|
||||
//
|
||||
// To evaluate the Printf, package message wraps the arguments in a Renderer
|
||||
// that is passed to the catalog for message decoding. The call sequence that
|
||||
// results from evaluating the above message, assuming the person is rather
|
||||
// tardy, is:
|
||||
//
|
||||
// Render("You are %[1]d ")
|
||||
// Arg(1)
|
||||
// Render("minutes")
|
||||
// Render(" late.")
|
||||
//
|
||||
// The calls to Arg is caused by the plural.Select execution, which evaluates
|
||||
// the argument to determine whether the singular or plural message form should
|
||||
// be selected. The calls to Render reports the partial results to the message
|
||||
// package for further evaluation.
|
||||
package catmsg
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// A Handle refers to a registered message type.
|
||||
type Handle int
|
||||
|
||||
// A Handler decodes and evaluates data compiled by a Message and sends the
|
||||
// result to the Decoder. The output may depend on the value of the substitution
|
||||
// arguments, accessible by the Decoder's Arg method. The Handler returns false
|
||||
// if there is no translation for the given substitution arguments.
|
||||
type Handler func(d *Decoder) bool
|
||||
|
||||
// Register records the existence of a message type and returns a Handle that
|
||||
// can be used in the Encoder's EncodeMessageType method to create such
|
||||
// messages. The prefix of the name should be the package path followed by
|
||||
// an optional disambiguating string.
|
||||
// Register will panic if a handle for the same name was already registered.
|
||||
func Register(name string, handler Handler) Handle {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
|
||||
if _, ok := names[name]; ok {
|
||||
panic(fmt.Errorf("catmsg: handler for %q already exists", name))
|
||||
}
|
||||
h := Handle(len(handlers))
|
||||
names[name] = h
|
||||
handlers = append(handlers, handler)
|
||||
return h
|
||||
}
|
||||
|
||||
// These handlers require fixed positions in the handlers slice.
|
||||
const (
|
||||
msgVars Handle = iota
|
||||
msgFirst
|
||||
msgRaw
|
||||
msgString
|
||||
msgAffix
|
||||
// Leave some arbitrary room for future expansion: 20 should suffice.
|
||||
numInternal = 20
|
||||
)
|
||||
|
||||
const prefix = "golang.org/x/text/internal/catmsg."
|
||||
|
||||
var (
|
||||
// TODO: find a more stable way to link handles to message types.
|
||||
mutex sync.Mutex
|
||||
names = map[string]Handle{
|
||||
prefix + "Vars": msgVars,
|
||||
prefix + "First": msgFirst,
|
||||
prefix + "Raw": msgRaw,
|
||||
prefix + "String": msgString,
|
||||
prefix + "Affix": msgAffix,
|
||||
}
|
||||
handlers = make([]Handler, numInternal)
|
||||
)
|
||||
|
||||
func init() {
|
||||
// This handler is a message type wrapper that initializes a decoder
|
||||
// with a variable block. This message type, if present, is always at the
|
||||
// start of an encoded message.
|
||||
handlers[msgVars] = func(d *Decoder) bool {
|
||||
blockSize := int(d.DecodeUint())
|
||||
d.vars = d.data[:blockSize]
|
||||
d.data = d.data[blockSize:]
|
||||
return d.executeMessage()
|
||||
}
|
||||
|
||||
// First takes the first message in a sequence that results in a match for
|
||||
// the given substitution arguments.
|
||||
handlers[msgFirst] = func(d *Decoder) bool {
|
||||
for !d.Done() {
|
||||
if d.ExecuteMessage() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
handlers[msgRaw] = func(d *Decoder) bool {
|
||||
d.Render(d.data)
|
||||
return true
|
||||
}
|
||||
|
||||
// A String message alternates between a string constant and a variable
|
||||
// substitution.
|
||||
handlers[msgString] = func(d *Decoder) bool {
|
||||
for !d.Done() {
|
||||
if str := d.DecodeString(); str != "" {
|
||||
d.Render(str)
|
||||
}
|
||||
if d.Done() {
|
||||
break
|
||||
}
|
||||
d.ExecuteSubstitution()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
handlers[msgAffix] = func(d *Decoder) bool {
|
||||
// TODO: use an alternative method for common cases.
|
||||
prefix := d.DecodeString()
|
||||
suffix := d.DecodeString()
|
||||
if prefix != "" {
|
||||
d.Render(prefix)
|
||||
}
|
||||
ret := d.ExecuteMessage()
|
||||
if suffix != "" {
|
||||
d.Render(suffix)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrIncomplete indicates a compiled message does not define translations
|
||||
// for all possible argument values. If this message is returned, evaluating
|
||||
// a message may result in the ErrNoMatch error.
|
||||
ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
|
||||
|
||||
// ErrNoMatch indicates no translation message matched the given input
|
||||
// parameters when evaluating a message.
|
||||
ErrNoMatch = errors.New("catmsg: no translation for inputs")
|
||||
)
|
||||
|
||||
// A Message holds a collection of translations for the same phrase that may
|
||||
// vary based on the values of substitution arguments.
|
||||
type Message interface {
|
||||
// Compile encodes the format string(s) of the message as a string for later
|
||||
// evaluation.
|
||||
//
|
||||
// The first call Compile makes on the encoder must be EncodeMessageType.
|
||||
// The handle passed to this call may either be a handle returned by
|
||||
// Register to encode a single custom message, or HandleFirst followed by
|
||||
// a sequence of calls to EncodeMessage.
|
||||
//
|
||||
// Compile must return ErrIncomplete if it is possible for evaluation to
|
||||
// not match any translation for a given set of formatting parameters.
|
||||
// For example, selecting a translation based on plural form may not yield
|
||||
// a match if the form "Other" is not one of the selectors.
|
||||
//
|
||||
// Compile may return any other application-specific error. For backwards
|
||||
// compatibility with package like fmt, which often do not do sanity
|
||||
// checking of format strings ahead of time, Compile should still make an
|
||||
// effort to have some sensible fallback in case of an error.
|
||||
Compile(e *Encoder) error
|
||||
}
|
||||
|
||||
// Compile converts a Message to a data string that can be stored in a Catalog.
|
||||
// The resulting string can subsequently be decoded by passing to the Execute
|
||||
// method of a Decoder.
|
||||
func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
|
||||
// TODO: pass macros so they can be used for validation.
|
||||
v := &Encoder{inBody: true} // encoder for variables
|
||||
v.root = v
|
||||
e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
|
||||
err = m.Compile(e)
|
||||
// This package serves te message package, which in turn is meant to be a
|
||||
// drop-in replacement for fmt. With the fmt package, format strings are
|
||||
// evaluated lazily and errors are handled by substituting strings in the
|
||||
// result, rather then returning an error. Dealing with multiple languages
|
||||
// makes it more important to check errors ahead of time. We chose to be
|
||||
// consistent and compatible and allow graceful degradation in case of
|
||||
// errors.
|
||||
buf := e.buf[stripPrefix(e.buf):]
|
||||
if len(v.buf) > 0 {
|
||||
// Prepend variable block.
|
||||
b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
|
||||
b[0] = byte(msgVars)
|
||||
b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
|
||||
b = append(b, v.buf...)
|
||||
b = append(b, buf...)
|
||||
buf = b
|
||||
}
|
||||
if err == nil {
|
||||
err = v.err
|
||||
}
|
||||
return string(buf), err
|
||||
}
|
||||
|
||||
// FirstOf is a message type that prints the first message in the sequence that
|
||||
// resolves to a match for the given substitution arguments.
|
||||
type FirstOf []Message
|
||||
|
||||
// Compile implements Message.
|
||||
func (s FirstOf) Compile(e *Encoder) error {
|
||||
e.EncodeMessageType(msgFirst)
|
||||
err := ErrIncomplete
|
||||
for i, m := range s {
|
||||
if err == nil {
|
||||
return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
|
||||
}
|
||||
err = e.EncodeMessage(m)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Var defines a message that can be substituted for a placeholder of the same
|
||||
// name. If an expression does not result in a string after evaluation, Name is
|
||||
// used as the substitution. For example:
|
||||
// Var{
|
||||
// Name: "minutes",
|
||||
// Message: plural.Select(1, "one", "minute"),
|
||||
// }
|
||||
// will resolve to minute for singular and minutes for plural forms.
|
||||
type Var struct {
|
||||
Name string
|
||||
Message Message
|
||||
}
|
||||
|
||||
var errIsVar = errors.New("catmsg: variable used as message")
|
||||
|
||||
// Compile implements Message.
|
||||
//
|
||||
// Note that this method merely registers a variable; it does not create an
|
||||
// encoded message.
|
||||
func (v *Var) Compile(e *Encoder) error {
|
||||
if err := e.addVar(v.Name, v.Message); err != nil {
|
||||
return err
|
||||
}
|
||||
// Using a Var by itself is an error. If it is in a sequence followed by
|
||||
// other messages referring to it, this error will be ignored.
|
||||
return errIsVar
|
||||
}
|
||||
|
||||
// Raw is a message consisting of a single format string that is passed as is
|
||||
// to the Renderer.
|
||||
//
|
||||
// Note that a Renderer may still do its own variable substitution.
|
||||
type Raw string
|
||||
|
||||
// Compile implements Message.
|
||||
func (r Raw) Compile(e *Encoder) (err error) {
|
||||
e.EncodeMessageType(msgRaw)
|
||||
// Special case: raw strings don't have a size encoding and so don't use
|
||||
// EncodeString.
|
||||
e.buf = append(e.buf, r...)
|
||||
return nil
|
||||
}
|
||||
|
||||
// String is a message consisting of a single format string which contains
|
||||
// placeholders that may be substituted with variables.
|
||||
//
|
||||
// Variable substitutions are marked with placeholders and a variable name of
|
||||
// the form ${name}. Any other substitutions such as Go templates or
|
||||
// printf-style substitutions are left to be done by the Renderer.
|
||||
//
|
||||
// When evaluation a string interpolation, a Renderer will receive separate
|
||||
// calls for each placeholder and interstitial string. For example, for the
|
||||
// message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
|
||||
// is:
|
||||
// d.Render("%[1]v ")
|
||||
// d.Arg(1)
|
||||
// d.Render(resultOfInvites)
|
||||
// d.Render(" %[2]v to ")
|
||||
// d.Arg(2)
|
||||
// d.Render(resultOfTheir)
|
||||
// d.Render(" party.")
|
||||
// where the messages for "invites" and "their" both use a plural.Select
|
||||
// referring to the first argument.
|
||||
//
|
||||
// Strings may also invoke macros. Macros are essentially variables that can be
|
||||
// reused. Macros may, for instance, be used to make selections between
|
||||
// different conjugations of a verb. See the catalog package description for an
|
||||
// overview of macros.
|
||||
type String string
|
||||
|
||||
// Compile implements Message. It parses the placeholder formats and returns
|
||||
// any error.
|
||||
func (s String) Compile(e *Encoder) (err error) {
|
||||
msg := string(s)
|
||||
const subStart = "${"
|
||||
hasHeader := false
|
||||
p := 0
|
||||
b := []byte{}
|
||||
for {
|
||||
i := strings.Index(msg[p:], subStart)
|
||||
if i == -1 {
|
||||
break
|
||||
}
|
||||
b = append(b, msg[p:p+i]...)
|
||||
p += i + len(subStart)
|
||||
if i = strings.IndexByte(msg[p:], '}'); i == -1 {
|
||||
b = append(b, "$!(MISSINGBRACE)"...)
|
||||
err = fmt.Errorf("catmsg: missing '}'")
|
||||
p = len(msg)
|
||||
break
|
||||
}
|
||||
name := strings.TrimSpace(msg[p : p+i])
|
||||
if q := strings.IndexByte(name, '('); q == -1 {
|
||||
if !hasHeader {
|
||||
hasHeader = true
|
||||
e.EncodeMessageType(msgString)
|
||||
}
|
||||
e.EncodeString(string(b))
|
||||
e.EncodeSubstitution(name)
|
||||
b = b[:0]
|
||||
} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
|
||||
// TODO: what should the error be?
|
||||
b = append(b, "$!(MISSINGPAREN)"...)
|
||||
err = fmt.Errorf("catmsg: missing ')'")
|
||||
} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
|
||||
// TODO: handle more than one argument
|
||||
b = append(b, "$!(BADNUM)"...)
|
||||
err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
|
||||
} else {
|
||||
if !hasHeader {
|
||||
hasHeader = true
|
||||
e.EncodeMessageType(msgString)
|
||||
}
|
||||
e.EncodeString(string(b))
|
||||
e.EncodeSubstitution(name[:q], int(x))
|
||||
b = b[:0]
|
||||
}
|
||||
p += i + 1
|
||||
}
|
||||
b = append(b, msg[p:]...)
|
||||
if !hasHeader {
|
||||
// Simplify string to a raw string.
|
||||
Raw(string(b)).Compile(e)
|
||||
} else if len(b) > 0 {
|
||||
e.EncodeString(string(b))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Affix is a message that adds a prefix and suffix to another message.
|
||||
// This is mostly used add back whitespace to a translation that was stripped
|
||||
// before sending it out.
|
||||
type Affix struct {
|
||||
Message Message
|
||||
Prefix string
|
||||
Suffix string
|
||||
}
|
||||
|
||||
// Compile implements Message.
|
||||
func (a Affix) Compile(e *Encoder) (err error) {
|
||||
// TODO: consider adding a special message type that just adds a single
|
||||
// return. This is probably common enough to handle the majority of cases.
|
||||
// Get some stats first, though.
|
||||
e.EncodeMessageType(msgAffix)
|
||||
e.EncodeString(a.Prefix)
|
||||
e.EncodeString(a.Suffix)
|
||||
e.EncodeMessage(a.Message)
|
||||
return nil
|
||||
}
|
@ -0,0 +1,407 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package catmsg
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// A Renderer renders a Message.
|
||||
type Renderer interface {
|
||||
// Render renders the given string. The given string may be interpreted as a
|
||||
// format string, such as the one used by the fmt package or a template.
|
||||
Render(s string)
|
||||
|
||||
// Arg returns the i-th argument passed to format a message. This method
|
||||
// should return nil if there is no such argument. Messages need access to
|
||||
// arguments to allow selecting a message based on linguistic features of
|
||||
// those arguments.
|
||||
Arg(i int) interface{}
|
||||
}
|
||||
|
||||
// A Dictionary specifies a source of messages, including variables or macros.
|
||||
type Dictionary interface {
|
||||
// Lookup returns the message for the given key. It returns false for ok if
|
||||
// such a message could not be found.
|
||||
Lookup(key string) (data string, ok bool)
|
||||
|
||||
// TODO: consider returning an interface, instead of a string. This will
|
||||
// allow implementations to do their own message type decoding.
|
||||
}
|
||||
|
||||
// An Encoder serializes a Message to a string.
|
||||
type Encoder struct {
|
||||
// The root encoder is used for storing encoded variables.
|
||||
root *Encoder
|
||||
// The parent encoder provides the surrounding scopes for resolving variable
|
||||
// names.
|
||||
parent *Encoder
|
||||
|
||||
tag language.Tag
|
||||
|
||||
// buf holds the encoded message so far. After a message completes encoding,
|
||||
// the contents of buf, prefixed by the encoded length, are flushed to the
|
||||
// parent buffer.
|
||||
buf []byte
|
||||
|
||||
// vars is the lookup table of variables in the current scope.
|
||||
vars []keyVal
|
||||
|
||||
err error
|
||||
inBody bool // if false next call must be EncodeMessageType
|
||||
}
|
||||
|
||||
type keyVal struct {
|
||||
key string
|
||||
offset int
|
||||
}
|
||||
|
||||
// Language reports the language for which the encoded message will be stored
|
||||
// in the Catalog.
|
||||
func (e *Encoder) Language() language.Tag { return e.tag }
|
||||
|
||||
func (e *Encoder) setError(err error) {
|
||||
if e.root.err == nil {
|
||||
e.root.err = err
|
||||
}
|
||||
}
|
||||
|
||||
// EncodeUint encodes x.
|
||||
func (e *Encoder) EncodeUint(x uint64) {
|
||||
e.checkInBody()
|
||||
var buf [maxVarintBytes]byte
|
||||
n := encodeUint(buf[:], x)
|
||||
e.buf = append(e.buf, buf[:n]...)
|
||||
}
|
||||
|
||||
// EncodeString encodes s.
|
||||
func (e *Encoder) EncodeString(s string) {
|
||||
e.checkInBody()
|
||||
e.EncodeUint(uint64(len(s)))
|
||||
e.buf = append(e.buf, s...)
|
||||
}
|
||||
|
||||
// EncodeMessageType marks the current message to be of type h.
|
||||
//
|
||||
// It must be the first call of a Message's Compile method.
|
||||
func (e *Encoder) EncodeMessageType(h Handle) {
|
||||
if e.inBody {
|
||||
panic("catmsg: EncodeMessageType not the first method called")
|
||||
}
|
||||
e.inBody = true
|
||||
e.EncodeUint(uint64(h))
|
||||
}
|
||||
|
||||
// EncodeMessage serializes the given message inline at the current position.
|
||||
func (e *Encoder) EncodeMessage(m Message) error {
|
||||
e = &Encoder{root: e.root, parent: e, tag: e.tag}
|
||||
err := m.Compile(e)
|
||||
if _, ok := m.(*Var); !ok {
|
||||
e.flushTo(e.parent)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Encoder) checkInBody() {
|
||||
if !e.inBody {
|
||||
panic("catmsg: expected prior call to EncodeMessageType")
|
||||
}
|
||||
}
|
||||
|
||||
// stripPrefix indicates the number of prefix bytes that must be stripped to
|
||||
// turn a single-element sequence into a message that is just this single member
|
||||
// without its size prefix. If the message can be stripped, b[1:n] contains the
|
||||
// size prefix.
|
||||
func stripPrefix(b []byte) (n int) {
|
||||
if len(b) > 0 && Handle(b[0]) == msgFirst {
|
||||
x, n, _ := decodeUint(b[1:])
|
||||
if 1+n+int(x) == len(b) {
|
||||
return 1 + n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *Encoder) flushTo(dst *Encoder) {
|
||||
data := e.buf
|
||||
p := stripPrefix(data)
|
||||
if p > 0 {
|
||||
data = data[1:]
|
||||
} else {
|
||||
// Prefix the size.
|
||||
dst.EncodeUint(uint64(len(data)))
|
||||
}
|
||||
dst.buf = append(dst.buf, data...)
|
||||
}
|
||||
|
||||
func (e *Encoder) addVar(key string, m Message) error {
|
||||
for _, v := range e.parent.vars {
|
||||
if v.key == key {
|
||||
err := fmt.Errorf("catmsg: duplicate variable %q", key)
|
||||
e.setError(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
scope := e.parent
|
||||
// If a variable message is Incomplete, and does not evaluate to a message
|
||||
// during execution, we fall back to the variable name. We encode this by
|
||||
// appending the variable name if the message reports it's incomplete.
|
||||
|
||||
err := m.Compile(e)
|
||||
if err != ErrIncomplete {
|
||||
e.setError(err)
|
||||
}
|
||||
switch {
|
||||
case len(e.buf) == 1 && Handle(e.buf[0]) == msgFirst: // empty sequence
|
||||
e.buf = e.buf[:0]
|
||||
e.inBody = false
|
||||
fallthrough
|
||||
case len(e.buf) == 0:
|
||||
// Empty message.
|
||||
if err := String(key).Compile(e); err != nil {
|
||||
e.setError(err)
|
||||
}
|
||||
case err == ErrIncomplete:
|
||||
if Handle(e.buf[0]) != msgFirst {
|
||||
seq := &Encoder{root: e.root, parent: e}
|
||||
seq.EncodeMessageType(msgFirst)
|
||||
e.flushTo(seq)
|
||||
e = seq
|
||||
}
|
||||
// e contains a sequence; append the fallback string.
|
||||
e.EncodeMessage(String(key))
|
||||
}
|
||||
|
||||
// Flush result to variable heap.
|
||||
offset := len(e.root.buf)
|
||||
e.flushTo(e.root)
|
||||
e.buf = e.buf[:0]
|
||||
|
||||
// Record variable offset in current scope.
|
||||
scope.vars = append(scope.vars, keyVal{key: key, offset: offset})
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
substituteVar = iota
|
||||
substituteMacro
|
||||
substituteError
|
||||
)
|
||||
|
||||
// EncodeSubstitution inserts a resolved reference to a variable or macro.
|
||||
//
|
||||
// This call must be matched with a call to ExecuteSubstitution at decoding
|
||||
// time.
|
||||
func (e *Encoder) EncodeSubstitution(name string, arguments ...int) {
|
||||
if arity := len(arguments); arity > 0 {
|
||||
// TODO: also resolve macros.
|
||||
e.EncodeUint(substituteMacro)
|
||||
e.EncodeString(name)
|
||||
for _, a := range arguments {
|
||||
e.EncodeUint(uint64(a))
|
||||
}
|
||||
return
|
||||
}
|
||||
for scope := e; scope != nil; scope = scope.parent {
|
||||
for _, v := range scope.vars {
|
||||
if v.key != name {
|
||||
continue
|
||||
}
|
||||
e.EncodeUint(substituteVar) // TODO: support arity > 0
|
||||
e.EncodeUint(uint64(v.offset))
|
||||
return
|
||||
}
|
||||
}
|
||||
// TODO: refer to dictionary-wide scoped variables.
|
||||
e.EncodeUint(substituteError)
|
||||
e.EncodeString(name)
|
||||
e.setError(fmt.Errorf("catmsg: unknown var %q", name))
|
||||
}
|
||||
|
||||
// A Decoder deserializes and evaluates messages that are encoded by an encoder.
|
||||
type Decoder struct {
|
||||
tag language.Tag
|
||||
dst Renderer
|
||||
macros Dictionary
|
||||
|
||||
err error
|
||||
vars string
|
||||
data string
|
||||
|
||||
macroArg int // TODO: allow more than one argument
|
||||
}
|
||||
|
||||
// NewDecoder returns a new Decoder.
|
||||
//
|
||||
// Decoders are designed to be reused for multiple invocations of Execute.
|
||||
// Only one goroutine may call Execute concurrently.
|
||||
func NewDecoder(tag language.Tag, r Renderer, macros Dictionary) *Decoder {
|
||||
return &Decoder{
|
||||
tag: tag,
|
||||
dst: r,
|
||||
macros: macros,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Decoder) setError(err error) {
|
||||
if d.err == nil {
|
||||
d.err = err
|
||||
}
|
||||
}
|
||||
|
||||
// Language returns the language in which the message is being rendered.
|
||||
//
|
||||
// The destination language may be a child language of the language used for
|
||||
// encoding. For instance, a decoding language of "pt-PT"" is consistent with an
|
||||
// encoding language of "pt".
|
||||
func (d *Decoder) Language() language.Tag { return d.tag }
|
||||
|
||||
// Done reports whether there are more bytes to process in this message.
|
||||
func (d *Decoder) Done() bool { return len(d.data) == 0 }
|
||||
|
||||
// Render implements Renderer.
|
||||
func (d *Decoder) Render(s string) { d.dst.Render(s) }
|
||||
|
||||
// Arg implements Renderer.
|
||||
//
|
||||
// During evaluation of macros, the argument positions may be mapped to
|
||||
// arguments that differ from the original call.
|
||||
func (d *Decoder) Arg(i int) interface{} {
|
||||
if d.macroArg != 0 {
|
||||
if i != 1 {
|
||||
panic("catmsg: only macros with single argument supported")
|
||||
}
|
||||
i = d.macroArg
|
||||
}
|
||||
return d.dst.Arg(i)
|
||||
}
|
||||
|
||||
// DecodeUint decodes a number that was encoded with EncodeUint and advances the
|
||||
// position.
|
||||
func (d *Decoder) DecodeUint() uint64 {
|
||||
x, n, err := decodeUintString(d.data)
|
||||
d.data = d.data[n:]
|
||||
if err != nil {
|
||||
d.setError(err)
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// DecodeString decodes a string that was encoded with EncodeString and advances
|
||||
// the position.
|
||||
func (d *Decoder) DecodeString() string {
|
||||
size := d.DecodeUint()
|
||||
s := d.data[:size]
|
||||
d.data = d.data[size:]
|
||||
return s
|
||||
}
|
||||
|
||||
// SkipMessage skips the message at the current location and advances the
|
||||
// position.
|
||||
func (d *Decoder) SkipMessage() {
|
||||
n := int(d.DecodeUint())
|
||||
d.data = d.data[n:]
|
||||
}
|
||||
|
||||
// Execute decodes and evaluates msg.
|
||||
//
|
||||
// Only one goroutine may call execute.
|
||||
func (d *Decoder) Execute(msg string) error {
|
||||
d.err = nil
|
||||
if !d.execute(msg) {
|
||||
return ErrNoMatch
|
||||
}
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *Decoder) execute(msg string) bool {
|
||||
saved := d.data
|
||||
d.data = msg
|
||||
ok := d.executeMessage()
|
||||
d.data = saved
|
||||
return ok
|
||||
}
|
||||
|
||||
// executeMessageFromData is like execute, but also decodes a leading message
|
||||
// size and clips the given string accordingly.
|
||||
//
|
||||
// It reports the number of bytes consumed and whether a message was selected.
|
||||
func (d *Decoder) executeMessageFromData(s string) (n int, ok bool) {
|
||||
saved := d.data
|
||||
d.data = s
|
||||
size := int(d.DecodeUint())
|
||||
n = len(s) - len(d.data)
|
||||
// Sanitize the setting. This allows skipping a size argument for
|
||||
// RawString and method Done.
|
||||
d.data = d.data[:size]
|
||||
ok = d.executeMessage()
|
||||
n += size - len(d.data)
|
||||
d.data = saved
|
||||
return n, ok
|
||||
}
|
||||
|
||||
var errUnknownHandler = errors.New("catmsg: string contains unsupported handler")
|
||||
|
||||
// executeMessage reads the handle id, initializes the decoder and executes the
|
||||
// message. It is assumed that all of d.data[d.p:] is the single message.
|
||||
func (d *Decoder) executeMessage() bool {
|
||||
if d.Done() {
|
||||
// We interpret no data as a valid empty message.
|
||||
return true
|
||||
}
|
||||
handle := d.DecodeUint()
|
||||
|
||||
var fn Handler
|
||||
mutex.Lock()
|
||||
if int(handle) < len(handlers) {
|
||||
fn = handlers[handle]
|
||||
}
|
||||
mutex.Unlock()
|
||||
if fn == nil {
|
||||
d.setError(errUnknownHandler)
|
||||
d.execute(fmt.Sprintf("\x02$!(UNKNOWNMSGHANDLER=%#x)", handle))
|
||||
return true
|
||||
}
|
||||
return fn(d)
|
||||
}
|
||||
|
||||
// ExecuteMessage decodes and executes the message at the current position.
|
||||
func (d *Decoder) ExecuteMessage() bool {
|
||||
n, ok := d.executeMessageFromData(d.data)
|
||||
d.data = d.data[n:]
|
||||
return ok
|
||||
}
|
||||
|
||||
// ExecuteSubstitution executes the message corresponding to the substitution
|
||||
// as encoded by EncodeSubstitution.
|
||||
func (d *Decoder) ExecuteSubstitution() {
|
||||
switch x := d.DecodeUint(); x {
|
||||
case substituteVar:
|
||||
offset := d.DecodeUint()
|
||||
d.executeMessageFromData(d.vars[offset:])
|
||||
case substituteMacro:
|
||||
name := d.DecodeString()
|
||||
data, ok := d.macros.Lookup(name)
|
||||
old := d.macroArg
|
||||
// TODO: support macros of arity other than 1.
|
||||
d.macroArg = int(d.DecodeUint())
|
||||
switch {
|
||||
case !ok:
|
||||
// TODO: detect this at creation time.
|
||||
d.setError(fmt.Errorf("catmsg: undefined macro %q", name))
|
||||
fallthrough
|
||||
case !d.execute(data):
|
||||
d.dst.Render(name) // fall back to macro name.
|
||||
}
|
||||
d.macroArg = old
|
||||
case substituteError:
|
||||
d.dst.Render(d.DecodeString())
|
||||
default:
|
||||
panic("catmsg: unreachable")
|
||||
}
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package catmsg
|
||||
|
||||
// This file implements varint encoding analogous to the one in encoding/binary.
|
||||
// We need a string version of this function, so we add that here and then add
|
||||
// the rest for consistency.
|
||||
|
||||
import "errors"
|
||||
|
||||
var (
|
||||
errIllegalVarint = errors.New("catmsg: illegal varint")
|
||||
errVarintTooLarge = errors.New("catmsg: varint too large for uint64")
|
||||
)
|
||||
|
||||
const maxVarintBytes = 10 // maximum length of a varint
|
||||
|
||||
// encodeUint encodes x as a variable-sized integer into buf and returns the
|
||||
// number of bytes written. buf must be at least maxVarintBytes long
|
||||
func encodeUint(buf []byte, x uint64) (n int) {
|
||||
for ; x > 127; n++ {
|
||||
buf[n] = 0x80 | uint8(x&0x7F)
|
||||
x >>= 7
|
||||
}
|
||||
buf[n] = uint8(x)
|
||||
n++
|
||||
return n
|
||||
}
|
||||
|
||||
func decodeUintString(s string) (x uint64, size int, err error) {
|
||||
i := 0
|
||||
for shift := uint(0); shift < 64; shift += 7 {
|
||||
if i >= len(s) {
|
||||
return 0, i, errIllegalVarint
|
||||
}
|
||||
b := uint64(s[i])
|
||||
i++
|
||||
x |= (b & 0x7F) << shift
|
||||
if b&0x80 == 0 {
|
||||
return x, i, nil
|
||||
}
|
||||
}
|
||||
return 0, i, errVarintTooLarge
|
||||
}
|
||||
|
||||
func decodeUint(b []byte) (x uint64, size int, err error) {
|
||||
i := 0
|
||||
for shift := uint(0); shift < 64; shift += 7 {
|
||||
if i >= len(b) {
|
||||
return 0, i, errIllegalVarint
|
||||
}
|
||||
c := uint64(b[i])
|
||||
i++
|
||||
x |= (c & 0x7F) << shift
|
||||
if c&0x80 == 0 {
|
||||
return x, i, nil
|
||||
}
|
||||
}
|
||||
return 0, i, errVarintTooLarge
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package format contains types for defining language-specific formatting of
|
||||
// values.
|
||||
//
|
||||
// This package is internal now, but will eventually be exposed after the API
|
||||
// settles.
|
||||
package format // import "golang.org/x/text/internal/format"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// State represents the printer state passed to custom formatters. It provides
|
||||
// access to the fmt.State interface and the sentence and language-related
|
||||
// context.
|
||||
type State interface {
|
||||
fmt.State
|
||||
|
||||
// Language reports the requested language in which to render a message.
|
||||
Language() language.Tag
|
||||
|
||||
// TODO: consider this and removing rune from the Format method in the
|
||||
// Formatter interface.
|
||||
//
|
||||
// Verb returns the format variant to render, analogous to the types used
|
||||
// in fmt. Use 'v' for the default or only variant.
|
||||
// Verb() rune
|
||||
|
||||
// TODO: more info:
|
||||
// - sentence context such as linguistic features passed by the translator.
|
||||
}
|
||||
|
||||
// Formatter is analogous to fmt.Formatter.
|
||||
type Formatter interface {
|
||||
Format(state State, verb rune)
|
||||
}
|
@ -0,0 +1,358 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// A Parser parses a format string. The result from the parse are set in the
|
||||
// struct fields.
|
||||
type Parser struct {
|
||||
Verb rune
|
||||
|
||||
WidthPresent bool
|
||||
PrecPresent bool
|
||||
Minus bool
|
||||
Plus bool
|
||||
Sharp bool
|
||||
Space bool
|
||||
Zero bool
|
||||
|
||||
// For the formats %+v %#v, we set the plusV/sharpV flags
|
||||
// and clear the plus/sharp flags since %+v and %#v are in effect
|
||||
// different, flagless formats set at the top level.
|
||||
PlusV bool
|
||||
SharpV bool
|
||||
|
||||
HasIndex bool
|
||||
|
||||
Width int
|
||||
Prec int // precision
|
||||
|
||||
// retain arguments across calls.
|
||||
Args []interface{}
|
||||
// retain current argument number across calls
|
||||
ArgNum int
|
||||
|
||||
// reordered records whether the format string used argument reordering.
|
||||
Reordered bool
|
||||
// goodArgNum records whether the most recent reordering directive was valid.
|
||||
goodArgNum bool
|
||||
|
||||
// position info
|
||||
format string
|
||||
startPos int
|
||||
endPos int
|
||||
Status Status
|
||||
}
|
||||
|
||||
// Reset initializes a parser to scan format strings for the given args.
|
||||
func (p *Parser) Reset(args []interface{}) {
|
||||
p.Args = args
|
||||
p.ArgNum = 0
|
||||
p.startPos = 0
|
||||
p.Reordered = false
|
||||
}
|
||||
|
||||
// Text returns the part of the format string that was parsed by the last call
|
||||
// to Scan. It returns the original substitution clause if the current scan
|
||||
// parsed a substitution.
|
||||
func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }
|
||||
|
||||
// SetFormat sets a new format string to parse. It does not reset the argument
|
||||
// count.
|
||||
func (p *Parser) SetFormat(format string) {
|
||||
p.format = format
|
||||
p.startPos = 0
|
||||
p.endPos = 0
|
||||
}
|
||||
|
||||
// Status indicates the result type of a call to Scan.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
StatusText Status = iota
|
||||
StatusSubstitution
|
||||
StatusBadWidthSubstitution
|
||||
StatusBadPrecSubstitution
|
||||
StatusNoVerb
|
||||
StatusBadArgNum
|
||||
StatusMissingArg
|
||||
)
|
||||
|
||||
// ClearFlags reset the parser to default behavior.
|
||||
func (p *Parser) ClearFlags() {
|
||||
p.WidthPresent = false
|
||||
p.PrecPresent = false
|
||||
p.Minus = false
|
||||
p.Plus = false
|
||||
p.Sharp = false
|
||||
p.Space = false
|
||||
p.Zero = false
|
||||
|
||||
p.PlusV = false
|
||||
p.SharpV = false
|
||||
|
||||
p.HasIndex = false
|
||||
}
|
||||
|
||||
// Scan scans the next part of the format string and sets the status to
|
||||
// indicate whether it scanned a string literal, substitution or error.
|
||||
func (p *Parser) Scan() bool {
|
||||
p.Status = StatusText
|
||||
format := p.format
|
||||
end := len(format)
|
||||
if p.endPos >= end {
|
||||
return false
|
||||
}
|
||||
afterIndex := false // previous item in format was an index like [3].
|
||||
|
||||
p.startPos = p.endPos
|
||||
p.goodArgNum = true
|
||||
i := p.startPos
|
||||
for i < end && format[i] != '%' {
|
||||
i++
|
||||
}
|
||||
if i > p.startPos {
|
||||
p.endPos = i
|
||||
return true
|
||||
}
|
||||
// Process one verb
|
||||
i++
|
||||
|
||||
p.Status = StatusSubstitution
|
||||
|
||||
// Do we have flags?
|
||||
p.ClearFlags()
|
||||
|
||||
simpleFormat:
|
||||
for ; i < end; i++ {
|
||||
c := p.format[i]
|
||||
switch c {
|
||||
case '#':
|
||||
p.Sharp = true
|
||||
case '0':
|
||||
p.Zero = !p.Minus // Only allow zero padding to the left.
|
||||
case '+':
|
||||
p.Plus = true
|
||||
case '-':
|
||||
p.Minus = true
|
||||
p.Zero = false // Do not pad with zeros to the right.
|
||||
case ' ':
|
||||
p.Space = true
|
||||
default:
|
||||
// Fast path for common case of ascii lower case simple verbs
|
||||
// without precision or width or argument indices.
|
||||
if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
|
||||
if c == 'v' {
|
||||
// Go syntax
|
||||
p.SharpV = p.Sharp
|
||||
p.Sharp = false
|
||||
// Struct-field syntax
|
||||
p.PlusV = p.Plus
|
||||
p.Plus = false
|
||||
}
|
||||
p.Verb = rune(c)
|
||||
p.ArgNum++
|
||||
p.endPos = i + 1
|
||||
return true
|
||||
}
|
||||
// Format is more complex than simple flags and a verb or is malformed.
|
||||
break simpleFormat
|
||||
}
|
||||
}
|
||||
|
||||
// Do we have an explicit argument index?
|
||||
i, afterIndex = p.updateArgNumber(format, i)
|
||||
|
||||
// Do we have width?
|
||||
if i < end && format[i] == '*' {
|
||||
i++
|
||||
p.Width, p.WidthPresent = p.intFromArg()
|
||||
|
||||
if !p.WidthPresent {
|
||||
p.Status = StatusBadWidthSubstitution
|
||||
}
|
||||
|
||||
// We have a negative width, so take its value and ensure
|
||||
// that the minus flag is set
|
||||
if p.Width < 0 {
|
||||
p.Width = -p.Width
|
||||
p.Minus = true
|
||||
p.Zero = false // Do not pad with zeros to the right.
|
||||
}
|
||||
afterIndex = false
|
||||
} else {
|
||||
p.Width, p.WidthPresent, i = parsenum(format, i, end)
|
||||
if afterIndex && p.WidthPresent { // "%[3]2d"
|
||||
p.goodArgNum = false
|
||||
}
|
||||
}
|
||||
|
||||
// Do we have precision?
|
||||
if i+1 < end && format[i] == '.' {
|
||||
i++
|
||||
if afterIndex { // "%[3].2d"
|
||||
p.goodArgNum = false
|
||||
}
|
||||
i, afterIndex = p.updateArgNumber(format, i)
|
||||
if i < end && format[i] == '*' {
|
||||
i++
|
||||
p.Prec, p.PrecPresent = p.intFromArg()
|
||||
// Negative precision arguments don't make sense
|
||||
if p.Prec < 0 {
|
||||
p.Prec = 0
|
||||
p.PrecPresent = false
|
||||
}
|
||||
if !p.PrecPresent {
|
||||
p.Status = StatusBadPrecSubstitution
|
||||
}
|
||||
afterIndex = false
|
||||
} else {
|
||||
p.Prec, p.PrecPresent, i = parsenum(format, i, end)
|
||||
if !p.PrecPresent {
|
||||
p.Prec = 0
|
||||
p.PrecPresent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !afterIndex {
|
||||
i, afterIndex = p.updateArgNumber(format, i)
|
||||
}
|
||||
p.HasIndex = afterIndex
|
||||
|
||||
if i >= end {
|
||||
p.endPos = i
|
||||
p.Status = StatusNoVerb
|
||||
return true
|
||||
}
|
||||
|
||||
verb, w := utf8.DecodeRuneInString(format[i:])
|
||||
p.endPos = i + w
|
||||
p.Verb = verb
|
||||
|
||||
switch {
|
||||
case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
|
||||
p.startPos = p.endPos - 1
|
||||
p.Status = StatusText
|
||||
case !p.goodArgNum:
|
||||
p.Status = StatusBadArgNum
|
||||
case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
|
||||
p.Status = StatusMissingArg
|
||||
p.ArgNum++
|
||||
case verb == 'v':
|
||||
// Go syntax
|
||||
p.SharpV = p.Sharp
|
||||
p.Sharp = false
|
||||
// Struct-field syntax
|
||||
p.PlusV = p.Plus
|
||||
p.Plus = false
|
||||
fallthrough
|
||||
default:
|
||||
p.ArgNum++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// intFromArg gets the ArgNumth element of Args. On return, isInt reports
|
||||
// whether the argument has integer type.
|
||||
func (p *Parser) intFromArg() (num int, isInt bool) {
|
||||
if p.ArgNum < len(p.Args) {
|
||||
arg := p.Args[p.ArgNum]
|
||||
num, isInt = arg.(int) // Almost always OK.
|
||||
if !isInt {
|
||||
// Work harder.
|
||||
switch v := reflect.ValueOf(arg); v.Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
n := v.Int()
|
||||
if int64(int(n)) == n {
|
||||
num = int(n)
|
||||
isInt = true
|
||||
}
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
n := v.Uint()
|
||||
if int64(n) >= 0 && uint64(int(n)) == n {
|
||||
num = int(n)
|
||||
isInt = true
|
||||
}
|
||||
default:
|
||||
// Already 0, false.
|
||||
}
|
||||
}
|
||||
p.ArgNum++
|
||||
if tooLarge(num) {
|
||||
num = 0
|
||||
isInt = false
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// parseArgNumber returns the value of the bracketed number, minus 1
|
||||
// (explicit argument numbers are one-indexed but we want zero-indexed).
|
||||
// The opening bracket is known to be present at format[0].
|
||||
// The returned values are the index, the number of bytes to consume
|
||||
// up to the closing paren, if present, and whether the number parsed
|
||||
// ok. The bytes to consume will be 1 if no closing paren is present.
|
||||
func parseArgNumber(format string) (index int, wid int, ok bool) {
|
||||
// There must be at least 3 bytes: [n].
|
||||
if len(format) < 3 {
|
||||
return 0, 1, false
|
||||
}
|
||||
|
||||
// Find closing bracket.
|
||||
for i := 1; i < len(format); i++ {
|
||||
if format[i] == ']' {
|
||||
width, ok, newi := parsenum(format, 1, i)
|
||||
if !ok || newi != i {
|
||||
return 0, i + 1, false
|
||||
}
|
||||
return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
|
||||
}
|
||||
}
|
||||
return 0, 1, false
|
||||
}
|
||||
|
||||
// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
|
||||
// argNum or the value of the bracketed integer that begins format[i:]. It also returns
|
||||
// the new value of i, that is, the index of the next byte of the format to process.
|
||||
func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
|
||||
if len(format) <= i || format[i] != '[' {
|
||||
return i, false
|
||||
}
|
||||
p.Reordered = true
|
||||
index, wid, ok := parseArgNumber(format[i:])
|
||||
if ok && 0 <= index && index < len(p.Args) {
|
||||
p.ArgNum = index
|
||||
return i + wid, true
|
||||
}
|
||||
p.goodArgNum = false
|
||||
return i + wid, ok
|
||||
}
|
||||
|
||||
// tooLarge reports whether the magnitude of the integer is
|
||||
// too large to be used as a formatting width or precision.
|
||||
func tooLarge(x int) bool {
|
||||
const max int = 1e6
|
||||
return x > max || x < -max
|
||||
}
|
||||
|
||||
// parsenum converts ASCII to integer. num is 0 (and isnum is false) if no number present.
|
||||
func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
|
||||
if start >= end {
|
||||
return 0, false, end
|
||||
}
|
||||
for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
|
||||
if tooLarge(num) {
|
||||
return 0, false, end // Overflow; crazy long number most likely.
|
||||
}
|
||||
num = num*10 + int(s[newi]-'0')
|
||||
isnum = true
|
||||
}
|
||||
return
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains non-exported functionality that are used by
|
||||
// packages in the text repository.
|
||||
package internal // import "golang.org/x/text/internal"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// SortTags sorts tags in place.
|
||||
func SortTags(tags []language.Tag) {
|
||||
sort.Sort(sorter(tags))
|
||||
}
|
||||
|
||||
type sorter []language.Tag
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return s[i].String() < s[j].String()
|
||||
}
|
||||
|
||||
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
|
||||
// only unique tags.
|
||||
func UniqueTags(tags []language.Tag) []language.Tag {
|
||||
if len(tags) <= 1 {
|
||||
return tags
|
||||
}
|
||||
SortTags(tags)
|
||||
k := 0
|
||||
for i := 1; i < len(tags); i++ {
|
||||
if tags[k].String() < tags[i].String() {
|
||||
k++
|
||||
tags[k] = tags[i]
|
||||
}
|
||||
}
|
||||
return tags[:k+1]
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// CompactCoreInfo is a compact integer with the three core tags encoded.
|
||||
type CompactCoreInfo uint32
|
||||
|
||||
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
|
||||
// different language, region, and script values.
|
||||
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
|
||||
if t.LangID > langNoIndexOffset {
|
||||
return 0, false
|
||||
}
|
||||
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
|
||||
cci |= CompactCoreInfo(t.ScriptID) << 12
|
||||
cci |= CompactCoreInfo(t.RegionID)
|
||||
return cci, true
|
||||
}
|
||||
|
||||
// Tag generates a tag from c.
|
||||
func (c CompactCoreInfo) Tag() Tag {
|
||||
return Tag{
|
||||
LangID: Language(c >> 20),
|
||||
RegionID: Region(c & 0x3ff),
|
||||
ScriptID: Script(c>>12) & 0xff,
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package compact defines a compact representation of language tags.
|
||||
//
|
||||
// Common language tags (at least all for which locale information is defined
|
||||
// in CLDR) are assigned a unique index. Each Tag is associated with such an
|
||||
// ID for selecting language-related resources (such as translations) as well
|
||||
// as one for selecting regional defaults (currency, number formatting, etc.)
|
||||
//
|
||||
// It may want to export this functionality at some point, but at this point
|
||||
// this is only available for use within x/text.
|
||||
package compact // import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ID is an integer identifying a single tag.
|
||||
type ID uint16
|
||||
|
||||
func getCoreIndex(t language.Tag) (id ID, ok bool) {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
i := sort.Search(len(coreTags), func(i int) bool {
|
||||
return cci <= coreTags[i]
|
||||
})
|
||||
if i == len(coreTags) || coreTags[i] != cci {
|
||||
return 0, false
|
||||
}
|
||||
return ID(i), true
|
||||
}
|
||||
|
||||
// Parent returns the ID of the parent or the root ID if id is already the root.
|
||||
func (id ID) Parent() ID {
|
||||
return parents[id]
|
||||
}
|
||||
|
||||
// Tag converts id to an internal language Tag.
|
||||
func (id ID) Tag() language.Tag {
|
||||
if int(id) >= len(coreTags) {
|
||||
return specialTags[int(id)-len(coreTags)]
|
||||
}
|
||||
return coreTags[id].Tag()
|
||||
}
|
||||
|
||||
var specialTags []language.Tag
|
||||
|
||||
func init() {
|
||||
tags := strings.Split(specialTagsStr, " ")
|
||||
specialTags = make([]language.Tag, len(tags))
|
||||
for i, t := range tags {
|
||||
specialTags[i] = language.MustParse(t)
|
||||
}
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "compact")
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeCompactIndex()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
@ -0,0 +1,113 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Compact indices:
|
||||
// Note -va-X variants only apply to localization variants.
|
||||
// BCP variants only ever apply to language.
|
||||
// The only ambiguity between tags is with regions.
|
||||
|
||||
func (b *builder) writeCompactIndex() {
|
||||
// Collect all language tags for which we have any data in CLDR.
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range b.data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: plural rules are also defined for the deprecated tags:
|
||||
// iw mo sh tl
|
||||
// Consider removing these as compact tags.
|
||||
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range b.supp.Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var coreTags []language.CompactCoreInfo
|
||||
var special []string
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
log.Fatalf("Locale for non-basic language %q", t)
|
||||
}
|
||||
coreTags = append(coreTags, cci)
|
||||
} else {
|
||||
special = append(special, t.String())
|
||||
}
|
||||
}
|
||||
|
||||
w := b.w
|
||||
|
||||
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
|
||||
sort.Strings(special)
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(m))
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, t := range coreTags {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
|
||||
}
|
||||
for i, t := range special {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("coreTags", coreTags)
|
||||
|
||||
w.WriteConst("specialTagsStr", strings.Join(special, " "))
|
||||
}
|
||||
|
||||
func ident(s string) string {
|
||||
return strings.Replace(s, "-", "", -1) + "Index"
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("parents.go", "compact")
|
||||
|
||||
// Create parents table.
|
||||
type ID uint16
|
||||
parents := make([]ID, compact.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := compact.FromTag(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := compact.ID(0) // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := compact.FromTag(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = ID(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
parents maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("parents", parents)
|
||||
}
|
@ -0,0 +1,260 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_index.go -output tables.go
|
||||
//go:generate go run gen_parents.go
|
||||
|
||||
package compact
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag struct {
|
||||
// NOTE: exported tags will become part of the public API.
|
||||
language ID
|
||||
locale ID
|
||||
full fullTag // always a language.Tag for now.
|
||||
}
|
||||
|
||||
const _und = 0
|
||||
|
||||
type fullTag interface {
|
||||
IsRoot() bool
|
||||
Parent() language.Tag
|
||||
}
|
||||
|
||||
// Make a compact Tag from a fully specified internal language Tag.
|
||||
func Make(t language.Tag) (tag Tag) {
|
||||
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
|
||||
if r, err := language.ParseRegion(region[:2]); err == nil {
|
||||
tFull := t
|
||||
t, _ = t.SetTypeForKey("rg", "")
|
||||
// TODO: should we not consider "va" for the language tag?
|
||||
var exact1, exact2 bool
|
||||
tag.language, exact1 = FromTag(t)
|
||||
t.RegionID = r
|
||||
tag.locale, exact2 = FromTag(t)
|
||||
if !exact1 || !exact2 {
|
||||
tag.full = tFull
|
||||
}
|
||||
return tag
|
||||
}
|
||||
}
|
||||
lang, ok := FromTag(t)
|
||||
tag.language = lang
|
||||
tag.locale = lang
|
||||
if !ok {
|
||||
tag.full = t
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// Tag returns an internal language Tag version of this tag.
|
||||
func (t Tag) Tag() language.Tag {
|
||||
if t.full != nil {
|
||||
return t.full.(language.Tag)
|
||||
}
|
||||
tag := t.language.Tag()
|
||||
if t.language != t.locale {
|
||||
loc := t.locale.Tag()
|
||||
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// IsCompact reports whether this tag is fully defined in terms of ID.
|
||||
func (t *Tag) IsCompact() bool {
|
||||
return t.full == nil
|
||||
}
|
||||
|
||||
// MayHaveVariants reports whether a tag may have variants. If it returns false
|
||||
// it is guaranteed the tag does not have variants.
|
||||
func (t Tag) MayHaveVariants() bool {
|
||||
return t.full != nil || int(t.language) >= len(coreTags)
|
||||
}
|
||||
|
||||
// MayHaveExtensions reports whether a tag may have extensions. If it returns
|
||||
// false it is guaranteed the tag does not have them.
|
||||
func (t Tag) MayHaveExtensions() bool {
|
||||
return t.full != nil ||
|
||||
int(t.language) >= len(coreTags) ||
|
||||
t.language != t.locale
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if t.full != nil {
|
||||
return t.full.IsRoot()
|
||||
}
|
||||
return t.language == _und
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.full != nil {
|
||||
return Make(t.full.Parent())
|
||||
}
|
||||
if t.language != t.locale {
|
||||
// Simulate stripping -u-rg-xxxxxx
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
// TODO: use parent lookup table once cycle from internal package is
|
||||
// removed. Probably by internalizing the table and declaring this fast
|
||||
// enough.
|
||||
// lang := compactID(internal.Parent(uint16(t.language)))
|
||||
lang, _ := FromTag(t.language.Tag().Parent())
|
||||
return Tag{language: lang, locale: lang}
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func LanguageID(t Tag) (id ID, exact bool) {
|
||||
return t.language, t.full == nil
|
||||
}
|
||||
|
||||
// RegionalID returns the ID for the regional variant of this tag. This index is
|
||||
// used to indicate region-specific overrides, such as default currency, default
|
||||
// calendar and week data, default time cycle, and default measurement system
|
||||
// and unit preferences.
|
||||
//
|
||||
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
|
||||
// settings for currency, number formatting, etc. The CompactIndex for this tag
|
||||
// will be that for en-GB, while the RegionalID will be the one corresponding to
|
||||
// en-US.
|
||||
func RegionalID(t Tag) (id ID, exact bool) {
|
||||
return t.locale, t.full == nil
|
||||
}
|
||||
|
||||
// LanguageTag returns t stripped of regional variant indicators.
|
||||
//
|
||||
// At the moment this means it is stripped of a regional and variant subtag "rg"
|
||||
// and "va" in the "u" extension.
|
||||
func (t Tag) LanguageTag() Tag {
|
||||
if t.full == nil {
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
tt := t.Tag()
|
||||
tt.SetTypeForKey("rg", "")
|
||||
tt.SetTypeForKey("va", "")
|
||||
return Make(tt)
|
||||
}
|
||||
|
||||
// RegionalTag returns the regional variant of the tag.
|
||||
//
|
||||
// At the moment this means that the region is set from the regional subtag
|
||||
// "rg" in the "u" extension.
|
||||
func (t Tag) RegionalTag() Tag {
|
||||
rt := Tag{language: t.locale, locale: t.locale}
|
||||
if t.full == nil {
|
||||
return rt
|
||||
}
|
||||
b := language.Builder{}
|
||||
tag := t.Tag()
|
||||
// tag, _ = tag.SetTypeForKey("rg", "")
|
||||
b.SetTag(t.locale.Tag())
|
||||
if v := tag.Variants(); v != "" {
|
||||
for _, v := range strings.Split(v, "-") {
|
||||
b.AddVariant(v)
|
||||
}
|
||||
}
|
||||
for _, e := range tag.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// FromTag reports closest matching ID for an internal language Tag.
|
||||
func FromTag(t language.Tag) (id ID, exact bool) {
|
||||
// TODO: perhaps give more frequent tags a lower index.
|
||||
// TODO: we could make the indexes stable. This will excluded some
|
||||
// possibilities for optimization, so don't do this quite yet.
|
||||
exact = true
|
||||
|
||||
b, s, r := t.Raw()
|
||||
if t.HasString() {
|
||||
if t.IsPrivateUse() {
|
||||
// We have no entries for user-defined tags.
|
||||
return 0, false
|
||||
}
|
||||
hasExtra := false
|
||||
if t.HasVariants() {
|
||||
if t.HasExtensions() {
|
||||
build := language.Builder{}
|
||||
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
|
||||
build.AddVariant(t.Variants())
|
||||
exact = false
|
||||
t = build.Make()
|
||||
}
|
||||
hasExtra = true
|
||||
} else if _, ok := t.Extension('u'); ok {
|
||||
// TODO: va may mean something else. Consider not considering it.
|
||||
// Strip all but the 'va' entry.
|
||||
old := t
|
||||
variant := t.TypeForKey("va")
|
||||
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if variant != "" {
|
||||
t, _ = t.SetTypeForKey("va", variant)
|
||||
hasExtra = true
|
||||
}
|
||||
exact = old == t
|
||||
} else {
|
||||
exact = false
|
||||
}
|
||||
if hasExtra {
|
||||
// We have some variants.
|
||||
for i, s := range specialTags {
|
||||
if s == t {
|
||||
return ID(i + len(coreTags)), exact
|
||||
}
|
||||
}
|
||||
exact = false
|
||||
}
|
||||
}
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
exact = false
|
||||
if r != 0 && s == 0 {
|
||||
// Deal with cases where an extra script is inserted for the region.
|
||||
t, _ := t.Maximize()
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
for t = t.Parent(); t != root; t = t.Parent() {
|
||||
// No variants specified: just compare core components.
|
||||
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||
// respectively the langID, scriptID, and regionID.
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
return 0, exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
@ -0,0 +1,120 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package compact
|
||||
|
||||
// parents maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var parents = []ID{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
|
||||
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
|
||||
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
|
||||
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
|
||||
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
|
||||
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
|
||||
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
|
||||
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
|
||||
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
|
||||
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
|
||||
// Entry 80 - BF
|
||||
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
|
||||
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
|
||||
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
|
||||
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
|
||||
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
// Entry 140 - 17F
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
|
||||
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
|
||||
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||
// Entry 180 - 1BF
|
||||
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
|
||||
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
|
||||
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
|
||||
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
|
||||
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
|
||||
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
|
||||
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
|
||||
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
|
||||
// Entry 200 - 23F
|
||||
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
|
||||
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
|
||||
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
|
||||
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
|
||||
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
|
||||
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
|
||||
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
|
||||
// Entry 240 - 27F
|
||||
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
|
||||
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
|
||||
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
|
||||
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
|
||||
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
|
||||
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
|
||||
// Entry 280 - 2BF
|
||||
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
|
||||
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
|
||||
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
|
||||
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
|
||||
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
|
||||
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
|
||||
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
|
||||
// Entry 2C0 - 2FF
|
||||
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
|
||||
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
|
||||
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
|
||||
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
|
||||
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
|
||||
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
|
||||
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
|
||||
// Entry 300 - 33F
|
||||
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
|
||||
} // Size: 1574 bytes
|
||||
|
||||
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,91 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
|
||||
Amharic Tag = Tag{language: amIndex, locale: amIndex}
|
||||
Arabic Tag = Tag{language: arIndex, locale: arIndex}
|
||||
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
|
||||
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
|
||||
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
|
||||
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
|
||||
Catalan Tag = Tag{language: caIndex, locale: caIndex}
|
||||
Czech Tag = Tag{language: csIndex, locale: csIndex}
|
||||
Danish Tag = Tag{language: daIndex, locale: daIndex}
|
||||
German Tag = Tag{language: deIndex, locale: deIndex}
|
||||
Greek Tag = Tag{language: elIndex, locale: elIndex}
|
||||
English Tag = Tag{language: enIndex, locale: enIndex}
|
||||
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
|
||||
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
|
||||
Spanish Tag = Tag{language: esIndex, locale: esIndex}
|
||||
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
|
||||
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
|
||||
Estonian Tag = Tag{language: etIndex, locale: etIndex}
|
||||
Persian Tag = Tag{language: faIndex, locale: faIndex}
|
||||
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
|
||||
Filipino Tag = Tag{language: filIndex, locale: filIndex}
|
||||
French Tag = Tag{language: frIndex, locale: frIndex}
|
||||
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
|
||||
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
|
||||
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
|
||||
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
|
||||
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
|
||||
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
|
||||
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
|
||||
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
|
||||
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
|
||||
Italian Tag = Tag{language: itIndex, locale: itIndex}
|
||||
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
|
||||
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
|
||||
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
|
||||
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
|
||||
Kannada Tag = Tag{language: knIndex, locale: knIndex}
|
||||
Korean Tag = Tag{language: koIndex, locale: koIndex}
|
||||
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
|
||||
Lao Tag = Tag{language: loIndex, locale: loIndex}
|
||||
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
|
||||
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
|
||||
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
|
||||
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
|
||||
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
|
||||
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
|
||||
Malay Tag = Tag{language: msIndex, locale: msIndex}
|
||||
Burmese Tag = Tag{language: myIndex, locale: myIndex}
|
||||
Nepali Tag = Tag{language: neIndex, locale: neIndex}
|
||||
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
|
||||
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
|
||||
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
|
||||
Polish Tag = Tag{language: plIndex, locale: plIndex}
|
||||
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
|
||||
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
|
||||
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
|
||||
Romanian Tag = Tag{language: roIndex, locale: roIndex}
|
||||
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
|
||||
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
|
||||
Slovak Tag = Tag{language: skIndex, locale: skIndex}
|
||||
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
|
||||
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
|
||||
Serbian Tag = Tag{language: srIndex, locale: srIndex}
|
||||
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
|
||||
Swedish Tag = Tag{language: svIndex, locale: svIndex}
|
||||
Swahili Tag = Tag{language: swIndex, locale: swIndex}
|
||||
Tamil Tag = Tag{language: taIndex, locale: taIndex}
|
||||
Telugu Tag = Tag{language: teIndex, locale: teIndex}
|
||||
Thai Tag = Tag{language: thIndex, locale: thIndex}
|
||||
Turkish Tag = Tag{language: trIndex, locale: trIndex}
|
||||
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
|
||||
Urdu Tag = Tag{language: urIndex, locale: urIndex}
|
||||
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
|
||||
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
|
||||
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
|
||||
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
|
||||
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
|
||||
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
|
||||
)
|
@ -0,0 +1,167 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Builder allows constructing a Tag from individual components.
|
||||
// Its main user is Compose in the top-level language package.
|
||||
type Builder struct {
|
||||
Tag Tag
|
||||
|
||||
private string // the x extension
|
||||
variants []string
|
||||
extensions []string
|
||||
}
|
||||
|
||||
// Make returns a new Tag from the current settings.
|
||||
func (b *Builder) Make() Tag {
|
||||
t := b.Tag
|
||||
|
||||
if len(b.extensions) > 0 || len(b.variants) > 0 {
|
||||
sort.Sort(sortVariants(b.variants))
|
||||
sort.Strings(b.extensions)
|
||||
|
||||
if b.private != "" {
|
||||
b.extensions = append(b.extensions, b.private)
|
||||
}
|
||||
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
|
||||
buf := make([]byte, n)
|
||||
p := t.genCoreBytes(buf)
|
||||
t.pVariant = byte(p)
|
||||
p += appendTokens(buf[p:], b.variants...)
|
||||
t.pExt = uint16(p)
|
||||
p += appendTokens(buf[p:], b.extensions...)
|
||||
t.str = string(buf[:p])
|
||||
// We may not always need to remake the string, but when or when not
|
||||
// to do so is rather tricky.
|
||||
scan := makeScanner(buf[:p])
|
||||
t, _ = parse(&scan, "")
|
||||
return t
|
||||
|
||||
} else if b.private != "" {
|
||||
t.str = b.private
|
||||
t.RemakeString()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// SetTag copies all the settings from a given Tag. Any previously set values
|
||||
// are discarded.
|
||||
func (b *Builder) SetTag(t Tag) {
|
||||
b.Tag.LangID = t.LangID
|
||||
b.Tag.RegionID = t.RegionID
|
||||
b.Tag.ScriptID = t.ScriptID
|
||||
// TODO: optimize
|
||||
b.variants = b.variants[:0]
|
||||
if variants := t.Variants(); variants != "" {
|
||||
for _, vr := range strings.Split(variants[1:], "-") {
|
||||
b.variants = append(b.variants, vr)
|
||||
}
|
||||
}
|
||||
b.extensions, b.private = b.extensions[:0], ""
|
||||
for _, e := range t.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
}
|
||||
|
||||
// AddExt adds extension e to the tag. e must be a valid extension as returned
|
||||
// by Tag.Extension. If the extension already exists, it will be discarded,
|
||||
// except for a -u extension, where non-existing key-type pairs will added.
|
||||
func (b *Builder) AddExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
if b.private == "" {
|
||||
b.private = e
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] += e[1:]
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// SetExt sets the extension e to the tag. e must be a valid extension as
|
||||
// returned by Tag.Extension. If the extension already exists, it will be
|
||||
// overwritten, except for a -u extension, where the individual key-type pairs
|
||||
// will be set.
|
||||
func (b *Builder) SetExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
b.private = e
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] = e + s[1:]
|
||||
} else {
|
||||
b.extensions[i] = e
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// AddVariant adds any number of variants.
|
||||
func (b *Builder) AddVariant(v ...string) {
|
||||
for _, v := range v {
|
||||
if v != "" {
|
||||
b.variants = append(b.variants, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ClearVariants removes any variants previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearVariants() {
|
||||
b.variants = b.variants[:0]
|
||||
}
|
||||
|
||||
// ClearExtensions removes any extensions previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearExtensions() {
|
||||
b.private = ""
|
||||
b.extensions = b.extensions[:0]
|
||||
}
|
||||
|
||||
func tokenLen(token ...string) (n int) {
|
||||
for _, t := range token {
|
||||
n += len(t) + 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func appendTokens(b []byte, token ...string) int {
|
||||
p := 0
|
||||
for _, t := range token {
|
||||
b[p] = '-'
|
||||
copy(b[p+1:], t)
|
||||
p += 1 + len(t)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type sortVariants []string
|
||||
|
||||
func (s sortVariants) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortVariants) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sortVariants) Less(i, j int) bool {
|
||||
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func BaseLanguages() []Language {
|
||||
base := make([]Language, 0, NumLanguages)
|
||||
for i := 0; i < langNoIndexOffset; i++ {
|
||||
// We included "und" already for the value 0.
|
||||
if i != nonCanonicalUnd {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
}
|
||||
i := langNoIndexOffset
|
||||
for _, v := range langNoIndex {
|
||||
for k := 0; k < 8; k++ {
|
||||
if v&1 == 1 {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
v >>= 1
|
||||
i++
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,20 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
@ -0,0 +1,596 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go -output tables.go
|
||||
|
||||
package language // import "golang.org/x/text/internal/language"
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||
maxCoreSize = 12
|
||||
|
||||
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||
max99thPercentileSize = 32
|
||||
|
||||
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||
maxSimpleUExtensionSize = 14
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed. The zero value of Tag is Und.
|
||||
type Tag struct {
|
||||
// TODO: the following fields have the form TagTypeID. This name is chosen
|
||||
// to allow refactoring the public package without conflicting with its
|
||||
// Base, Script, and Region methods. Once the transition is fully completed
|
||||
// the ID can be stripped from the name.
|
||||
|
||||
LangID Language
|
||||
RegionID Region
|
||||
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
|
||||
// storing lang, region, and ScriptID codes, store only the compact index and
|
||||
// have a lookup table from this code to its expansion. This greatly speeds
|
||||
// up table lookup, speed up common variant cases.
|
||||
// This will also immediately free up 3 extra bytes. Also, the pVariant
|
||||
// field can now be moved to the lookup table, as the compact index uniquely
|
||||
// determines the offset of a possible variant.
|
||||
ScriptID Script
|
||||
pVariant byte // offset in str, includes preceding '-'
|
||||
pExt uint16 // offset of first extension, includes preceding '-'
|
||||
|
||||
// str is the string representation of the Tag. It will only be used if the
|
||||
// tag has variants or extensions.
|
||||
str string
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
t, _ := Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
// TODO: consider removing
|
||||
func (t Tag) Raw() (b Language, s Script, r Region) {
|
||||
return t.LangID, t.ScriptID, t.RegionID
|
||||
}
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if int(t.pVariant) < len(t.str) {
|
||||
return false
|
||||
}
|
||||
return t.equalTags(Und)
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
|
||||
// tag.
|
||||
func (t Tag) IsPrivateUse() bool {
|
||||
return t.str != "" && t.pVariant == 0
|
||||
}
|
||||
|
||||
// RemakeString is used to update t.str in case lang, script or region changed.
|
||||
// It is assumed that pExt and pVariant still point to the start of the
|
||||
// respective parts.
|
||||
func (t *Tag) RemakeString() {
|
||||
if t.str == "" {
|
||||
return
|
||||
}
|
||||
extra := t.str[t.pVariant:]
|
||||
if t.pVariant > 0 {
|
||||
extra = extra[1:]
|
||||
}
|
||||
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
|
||||
t.str = extra
|
||||
t.pVariant = 0
|
||||
t.pExt = 0
|
||||
return
|
||||
}
|
||||
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||
b := buf[:t.genCoreBytes(buf[:])]
|
||||
if extra != "" {
|
||||
diff := len(b) - int(t.pVariant)
|
||||
b = append(b, '-')
|
||||
b = append(b, extra...)
|
||||
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||
t.pExt = uint16(int(t.pExt) + diff)
|
||||
} else {
|
||||
t.pVariant = uint8(len(b))
|
||||
t.pExt = uint16(len(b))
|
||||
}
|
||||
t.str = string(b)
|
||||
}
|
||||
|
||||
// genCoreBytes writes a string for the base languages, script and region tags
|
||||
// to the given buffer and returns the number of bytes written. It will never
|
||||
// write more than maxCoreSize bytes.
|
||||
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||
n := t.LangID.StringToBuf(buf[:])
|
||||
if t.ScriptID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.ScriptID.String())
|
||||
}
|
||||
if t.RegionID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.RegionID.String())
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
if t.str != "" {
|
||||
return t.str
|
||||
}
|
||||
if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
return t.LangID.String()
|
||||
}
|
||||
buf := [maxCoreSize]byte{}
|
||||
return string(buf[:t.genCoreBytes(buf[:])])
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
if t.str != "" {
|
||||
text = append(text, t.str...)
|
||||
} else if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
text = append(text, t.LangID.String()...)
|
||||
} else {
|
||||
buf := [maxCoreSize]byte{}
|
||||
text = buf[:t.genCoreBytes(buf[:])]
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
tag, err := Parse(string(text))
|
||||
*t = tag
|
||||
return err
|
||||
}
|
||||
|
||||
// Variants returns the part of the tag holding all variants or the empty string
|
||||
// if there are no variants defined.
|
||||
func (t Tag) Variants() string {
|
||||
if t.pVariant == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
|
||||
// VariantOrPrivateUseTags returns variants or private use tags.
|
||||
func (t Tag) VariantOrPrivateUseTags() string {
|
||||
if t.pExt > 0 {
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
return t.str[t.pVariant:]
|
||||
}
|
||||
|
||||
// HasString reports whether this tag defines more than just the raw
|
||||
// components.
|
||||
func (t Tag) HasString() bool {
|
||||
return t.str != ""
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.str != "" {
|
||||
// Strip the variants and extensions.
|
||||
b, s, r := t.Raw()
|
||||
t = Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID == t.ScriptID {
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
if t.RegionID != 0 {
|
||||
maxScript := t.ScriptID
|
||||
if maxScript == 0 {
|
||||
max, _ := addTags(t)
|
||||
maxScript = max.ScriptID
|
||||
}
|
||||
|
||||
for i := range parents {
|
||||
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
|
||||
for _, r := range parents[i].fromRegion {
|
||||
if Region(r) == t.RegionID {
|
||||
return Tag{
|
||||
LangID: t.LangID,
|
||||
ScriptID: Script(parents[i].script),
|
||||
RegionID: Region(parents[i].toRegion),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the script if it is the default one.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != maxScript {
|
||||
return Tag{LangID: t.LangID, ScriptID: maxScript}
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
} else if t.ScriptID != 0 {
|
||||
// The parent for an base-script pair with a non-default script is
|
||||
// "und" instead of the base language.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != t.ScriptID {
|
||||
return Und
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return Und
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (ext string, err error) {
|
||||
scan := makeScannerString(s)
|
||||
var end int
|
||||
if n := len(scan.token); n != 1 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parseExtension(&scan)
|
||||
if end != len(s) {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
return string(scan.b), nil
|
||||
}
|
||||
|
||||
// HasVariants reports whether t has variants.
|
||||
func (t Tag) HasVariants() bool {
|
||||
return uint16(t.pVariant) < t.pExt
|
||||
}
|
||||
|
||||
// HasExtensions reports whether t has extensions.
|
||||
func (t Tag) HasExtensions() bool {
|
||||
return int(t.pExt) < len(t.str)
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext string, ok bool) {
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
if ext[0] == x {
|
||||
return ext, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []string {
|
||||
e := []string{}
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
e = append(e, ext)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if start, end, _ := t.findTypeForKey(key); end != start {
|
||||
return t.str[start:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||
errInvalidArguments = errors.New("invalid key or type")
|
||||
)
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
if t.IsPrivateUse() {
|
||||
return t, errPrivateUse
|
||||
}
|
||||
if len(key) != 2 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
// Remove the setting if value is "".
|
||||
if value == "" {
|
||||
start, end, _ := t.findTypeForKey(key)
|
||||
if start != end {
|
||||
// Remove key tag and leading '-'.
|
||||
start -= 4
|
||||
|
||||
// Remove a possible empty extension.
|
||||
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
|
||||
start -= 2
|
||||
}
|
||||
if start == int(t.pVariant) && end == len(t.str) {
|
||||
t.str = ""
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
if len(value) < 3 || len(value) > 8 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
var (
|
||||
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||
uStart int // start of the -u extension.
|
||||
)
|
||||
|
||||
// Generate the tag string if needed.
|
||||
if t.str == "" {
|
||||
uStart = t.genCoreBytes(buf[:])
|
||||
buf[uStart] = '-'
|
||||
uStart++
|
||||
}
|
||||
|
||||
// Create new key-type pair and parse it to verify.
|
||||
b := buf[uStart:]
|
||||
copy(b, "u-")
|
||||
copy(b[2:], key)
|
||||
b[4] = '-'
|
||||
b = b[:5+copy(b[5:], value)]
|
||||
scan := makeScanner(b)
|
||||
if parseExtensions(&scan); scan.err != nil {
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// Assemble the replacement string.
|
||||
if t.str == "" {
|
||||
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||
t.str = string(buf[:uStart+len(b)])
|
||||
} else {
|
||||
s := t.str
|
||||
start, end, hasExt := t.findTypeForKey(key)
|
||||
if start == end {
|
||||
if hasExt {
|
||||
b = b[2:]
|
||||
}
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// findKeyAndType returns the start and end position for the type corresponding
|
||||
// to key or the point at which to insert the key-value pair if the type
|
||||
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||
// Note: the extensions are typically very small and are likely to contain
|
||||
// only one key-type pair.
|
||||
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
|
||||
p := int(t.pExt)
|
||||
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||
return p, p, false
|
||||
}
|
||||
s := t.str
|
||||
|
||||
// Find the correct extension.
|
||||
for p++; s[p] != 'u'; p++ {
|
||||
if s[p] > 'u' {
|
||||
p--
|
||||
return p, p, false
|
||||
}
|
||||
if p = nextExtension(s, p); p == len(s) {
|
||||
return len(s), len(s), false
|
||||
}
|
||||
}
|
||||
// Proceed to the hyphen following the extension name.
|
||||
p++
|
||||
|
||||
// curKey is the key currently being processed.
|
||||
curKey := ""
|
||||
|
||||
// Iterate over keys until we get the end of a section.
|
||||
for {
|
||||
// p points to the hyphen preceding the current token.
|
||||
if p3 := p + 3; s[p3] == '-' {
|
||||
// Found a key.
|
||||
// Check whether we just processed the key that was requested.
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
// Set to the next key and continue scanning type tokens.
|
||||
curKey = s[p+1 : p3]
|
||||
if curKey > key {
|
||||
return p, p, true
|
||||
}
|
||||
// Start of the type token sequence.
|
||||
start = p + 4
|
||||
// A type is at least 3 characters long.
|
||||
p += 7 // 4 + 3
|
||||
} else {
|
||||
// Attribute or type, which is at least 3 characters long.
|
||||
p += 4
|
||||
}
|
||||
// p points past the third character of a type or attribute.
|
||||
max := p + 5 // maximum length of token plus hyphen.
|
||||
if len(s) < max {
|
||||
max = len(s)
|
||||
}
|
||||
for ; p < max && s[p] != '-'; p++ {
|
||||
}
|
||||
// Bail if we have exhausted all tokens or if the next token starts
|
||||
// a new extension.
|
||||
if p == len(s) || s[p+2] == '-' {
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
return p, p, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Language, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getLangID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
if len(s) != 4 {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [4]byte
|
||||
return getScriptID(script, buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
return getRegionM49(r)
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getRegionID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
if r == 0 {
|
||||
return false
|
||||
}
|
||||
return int(regionInclusion[r]) < len(regionContainment)
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
if r == c {
|
||||
return true
|
||||
}
|
||||
g := regionInclusion[r]
|
||||
if g >= nRegionGroups {
|
||||
return false
|
||||
}
|
||||
m := regionContainment[g]
|
||||
|
||||
d := regionInclusion[c]
|
||||
b := regionInclusionBits[d]
|
||||
|
||||
// A contained country may belong to multiple disjoint groups. Matching any
|
||||
// of these indicates containment. If the contained region is a group, it
|
||||
// must strictly be a subset.
|
||||
if d >= nRegionGroups {
|
||||
return b&m != 0
|
||||
}
|
||||
return b&^m == 0
|
||||
}
|
||||
|
||||
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||
// difference between ISO 3166-1 and IANA ccTLD.
|
||||
if r == _GB {
|
||||
r = _UK
|
||||
}
|
||||
if (r.typ() & ccTLD) == 0 {
|
||||
return 0, errNoTLD
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
if cr := normRegion(r); cr != 0 {
|
||||
return cr
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
ID uint8
|
||||
str string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
s = strings.ToLower(s)
|
||||
if id, ok := variantIndex[s]; ok {
|
||||
return Variant{id, s}, nil
|
||||
}
|
||||
return Variant{}, NewValueError([]byte(s))
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.str
|
||||
}
|
@ -0,0 +1,412 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||
// if it could not be found.
|
||||
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||
if !tag.FixCase(form, key) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
i := idx.Index(key)
|
||||
if i == -1 {
|
||||
return 0, NewValueError(key)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
type Language uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical subtag
|
||||
// or langUnknown if s is not a canonical subtag.
|
||||
func getLangID(s []byte) (Language, error) {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// TODO language normalization as well as the AliasMaps could be moved to the
|
||||
// higher level package, but it is a bit tricky to separate the generation.
|
||||
|
||||
func (id Language) Canonicalize() (Language, AliasType) {
|
||||
return normLang(id)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id Language) (Language, AliasType) {
|
||||
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||
return AliasMap[i].From >= uint16(id)
|
||||
})
|
||||
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
|
||||
return Language(AliasMap[k].To), AliasTypes[k]
|
||||
}
|
||||
return id, AliasTypeUnknown
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) (Language, error) {
|
||||
if !tag.FixCase("zz", s) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||
return Language(i), nil
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) (Language, error) {
|
||||
if tag.FixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||
// We treat "und" as special and always translate it to "unspecified".
|
||||
// Note that ZZ and Zzzz are private use and are not treated as
|
||||
// unspecified by default.
|
||||
id := Language(i)
|
||||
if id == nonCanonicalUnd {
|
||||
return 0, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if i := altLangISO3.Index(s); i != -1 {
|
||||
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return Language(n) + langNoIndexOffset, nil
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Language(i), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
// StringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id Language) StringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
} else if id == 0 {
|
||||
return copy(b, "und")
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b Language) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Language) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return altLangISO3.Elem(int(l[3]))[:3]
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Language) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
// SuppressScript returns the script marked as SuppressScript in the IANA
|
||||
// language tag repository, or 0 if there is no such script.
|
||||
func (b Language) SuppressScript() Script {
|
||||
if b < langNoIndexOffset {
|
||||
return Script(suppressScript[b])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Region uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) (Region, error) {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) (Region, error) {
|
||||
i, err := findIndex(regionISO, s, "ZZ")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) (Region, error) {
|
||||
if tag.FixCase("ZZZ", s) {
|
||||
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||
return Region(altRegionIDs[i/3]), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
func getRegionM49(n int) (Region, error) {
|
||||
if 0 < n && n <= 999 {
|
||||
const (
|
||||
searchBits = 7
|
||||
regionBits = 9
|
||||
regionMask = 1<<regionBits - 1
|
||||
)
|
||||
idx := n >> searchBits
|
||||
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||
i := sort.Search(len(buf), func(i int) bool {
|
||||
return buf[i] >= val
|
||||
})
|
||||
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||
return Region(r & regionMask), nil
|
||||
}
|
||||
}
|
||||
var e ValueError
|
||||
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||
return 0, e
|
||||
}
|
||||
|
||||
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||
func normRegion(r Region) Region {
|
||||
m := regionOldMap
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].From >= uint16(r)
|
||||
})
|
||||
if k < len(m) && m[k].From == uint16(r) {
|
||||
return Region(m[k].To)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
iso3166UserAssigned = 1 << iota
|
||||
ccTLD
|
||||
bcp47Region
|
||||
)
|
||||
|
||||
func (r Region) typ() byte {
|
||||
return regionTypes[r]
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
if r < isoRegionOffset {
|
||||
if r == 0 {
|
||||
return "ZZ"
|
||||
}
|
||||
return fmt.Sprintf("%03d", r.M49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return regionISO.Elem(int(r))[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
if r < isoRegionOffset {
|
||||
return "ZZZ"
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO.Elem(int(r))
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return "ZZZ"
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return int(m49[r])
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type Script uint8
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx tag.Index, s []byte) (Script, error) {
|
||||
i, err := findIndex(idx, s, "Zzzz")
|
||||
return Script(i), err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
if s == 0 {
|
||||
return "Zzzz"
|
||||
}
|
||||
return script.Elem(int(s))
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return _Qaaa <= s && s <= _Qabx
|
||||
}
|
||||
|
||||
const (
|
||||
maxAltTaglen = len("en-US-POSIX")
|
||||
maxLen = maxAltTaglen
|
||||
)
|
||||
|
||||
var (
|
||||
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||
// their base language or index to more elaborate tag.
|
||||
grandfatheredMap = map[[maxLen]byte]int16{
|
||||
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as
|
||||
// follows:
|
||||
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||
|
||||
// CLDR-specific tag.
|
||||
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||
}
|
||||
|
||||
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||
|
||||
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||
)
|
||||
|
||||
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||
if v, ok := grandfatheredMap[s]; ok {
|
||||
if v < 0 {
|
||||
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||
}
|
||||
t.LangID = Language(v)
|
||||
return t, true
|
||||
}
|
||||
return t, false
|
||||
}
|
@ -0,0 +1,226 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "errors"
|
||||
|
||||
type scriptRegionFlags uint8
|
||||
|
||||
const (
|
||||
isList = 1 << iota
|
||||
scriptInFrom
|
||||
regionInFrom
|
||||
)
|
||||
|
||||
func (t *Tag) setUndefinedLang(id Language) {
|
||||
if t.LangID == 0 {
|
||||
t.LangID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedScript(id Script) {
|
||||
if t.ScriptID == 0 {
|
||||
t.ScriptID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedRegion(id Region) {
|
||||
if t.RegionID == 0 || t.RegionID.Contains(id) {
|
||||
t.RegionID = id
|
||||
}
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||
// In most cases this means setting fields for unknown values, but in some
|
||||
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
|
||||
// if the given locale cannot be expanded.
|
||||
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||
id, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
} else if id.equalTags(t) {
|
||||
return t, nil
|
||||
}
|
||||
id.RemakeString()
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// specializeRegion attempts to specialize a group region.
|
||||
func specializeRegion(t *Tag) bool {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Maximize returns a new tag with missing tags filled in.
|
||||
func (t Tag) Maximize() (Tag, error) {
|
||||
return addTags(t)
|
||||
}
|
||||
|
||||
func addTags(t Tag) (Tag, error) {
|
||||
// We leave private use identifiers alone.
|
||||
if t.IsPrivateUse() {
|
||||
return t, nil
|
||||
}
|
||||
if t.ScriptID != 0 && t.RegionID != 0 {
|
||||
if t.LangID != 0 {
|
||||
// already fully specified
|
||||
specializeRegion(&t)
|
||||
return t, nil
|
||||
}
|
||||
// Search matches for und-script-region. Note that for these cases
|
||||
// region will never be a group so there is no need to check for this.
|
||||
list := likelyRegion[t.RegionID : t.RegionID+1]
|
||||
if x := list[0]; x.flags&isList != 0 {
|
||||
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||
}
|
||||
for _, x := range list {
|
||||
// Deviating from the spec. See match_test.go for details.
|
||||
if Script(x.script) == t.ScriptID {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
// Search matches for lang-script and lang-region, where lang != und.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||
if t.ScriptID != 0 {
|
||||
for _, x := range list {
|
||||
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
} else if t.RegionID != 0 {
|
||||
count := 0
|
||||
goodScript := true
|
||||
tt := t
|
||||
for _, x := range list {
|
||||
// We visit all entries for which the script was not
|
||||
// defined, including the ones where the region was not
|
||||
// defined. This allows for proper disambiguation within
|
||||
// regions.
|
||||
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
|
||||
tt.RegionID = Region(x.region)
|
||||
tt.setUndefinedScript(Script(x.script))
|
||||
goodScript = goodScript && tt.ScriptID == Script(x.script)
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 1 {
|
||||
return tt, nil
|
||||
}
|
||||
// Even if we fail to find a unique Region, we might have
|
||||
// an unambiguous script.
|
||||
if goodScript {
|
||||
t.ScriptID = tt.ScriptID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search matches for und-script.
|
||||
if t.ScriptID != 0 {
|
||||
x := likelyScript[t.ScriptID]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// Search matches for und-region. If und-script-region exists, it would
|
||||
// have been found earlier.
|
||||
if t.RegionID != 0 {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
} else {
|
||||
x := likelyRegion[t.RegionID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyRegionList[x.lang]
|
||||
}
|
||||
if x.script != 0 && x.flags != scriptInFrom {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search matches for lang.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyLangList[x.region]
|
||||
}
|
||||
if x.region != 0 {
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
}
|
||||
specializeRegion(&t)
|
||||
if t.LangID == 0 {
|
||||
t.LangID = _en // default language
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
return t, ErrMissingLikelyTagsData
|
||||
}
|
||||
|
||||
func (t *Tag) setTagsFrom(id Tag) {
|
||||
t.LangID = id.LangID
|
||||
t.ScriptID = id.ScriptID
|
||||
t.RegionID = id.RegionID
|
||||
}
|
||||
|
||||
// minimize removes the region or script subtags from t such that
|
||||
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||
func (t Tag) minimize() (Tag, error) {
|
||||
t, err := minimizeTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
t.RemakeString()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||
func minimizeTags(t Tag) (Tag, error) {
|
||||
if t.equalTags(Und) {
|
||||
return t, nil
|
||||
}
|
||||
max, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
for _, id := range [...]Tag{
|
||||
{LangID: t.LangID},
|
||||
{LangID: t.LangID, RegionID: t.RegionID},
|
||||
{LangID: t.LangID, ScriptID: t.ScriptID},
|
||||
} {
|
||||
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||
t.setTagsFrom(id)
|
||||
break
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
@ -0,0 +1,594 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ErrSyntax is returned by any of the parsing functions when the
|
||||
// input is not well-formed, according to BCP 47.
|
||||
// TODO: return the position at which the syntax error occurred?
|
||||
var ErrSyntax = errors.New("language: tag is not well-formed")
|
||||
|
||||
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
||||
// different values in the -u section.
|
||||
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError struct {
|
||||
v [8]byte
|
||||
}
|
||||
|
||||
// NewValueError creates a new ValueError.
|
||||
func NewValueError(tag []byte) ValueError {
|
||||
var e ValueError
|
||||
copy(e.v[:], tag)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e ValueError) tag() []byte {
|
||||
n := bytes.IndexByte(e.v[:], 0)
|
||||
if n == -1 {
|
||||
n = 8
|
||||
}
|
||||
return e.v[:n]
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
func (e ValueError) Error() string {
|
||||
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||
}
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
func (e ValueError) Subtag() string {
|
||||
return string(e.tag())
|
||||
}
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [max99thPercentileSize]byte
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
// makeScanner returns a scanner using b as the input buffer.
|
||||
// b is not copied and may be modified by the scanner routines.
|
||||
func makeScanner(b []byte) scanner {
|
||||
scan := scanner{b: b}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
// resizeRange shrinks or grows the array at position oldStart such that
|
||||
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||
// Sets the scan point to after the resized range.
|
||||
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||
s.start = oldStart
|
||||
if end := oldStart + newSize; end != oldEnd {
|
||||
diff := end - oldEnd
|
||||
if end < cap(s.b) {
|
||||
b := make([]byte, len(s.b)+diff)
|
||||
copy(b, s.b[:oldStart])
|
||||
copy(b[end:], s.b[oldEnd:])
|
||||
s.b = b
|
||||
} else {
|
||||
s.b = append(s.b[end:], s.b[oldEnd:]...)
|
||||
}
|
||||
s.next = end + (s.next - s.end)
|
||||
s.end = end
|
||||
}
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
s.resizeRange(s.start, s.end, len(repl))
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble(e error) {
|
||||
s.setError(e)
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// deleteRange removes the given range from s.b before the current token.
|
||||
func (s *scanner) deleteRange(start, end int) {
|
||||
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||
diff := end - start
|
||||
s.next -= diff
|
||||
s.start -= diff
|
||||
s.end -= diff
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.gobble(ErrSyntax)
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(ErrSyntax)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return Und, ErrSyntax
|
||||
}
|
||||
if len(s) <= maxAltTaglen {
|
||||
b := [maxAltTaglen]byte{}
|
||||
for i, c := range s {
|
||||
// Generating invalid UTF-8 is okay as it won't match.
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
} else if c == '_' {
|
||||
c = '-'
|
||||
}
|
||||
b[i] = byte(c)
|
||||
}
|
||||
if t, ok := grandfathered(b); ok {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
return parse(&scan, s)
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||
t = Und
|
||||
var end int
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
if n == 0 || scan.token[0] != 'x' {
|
||||
return t, ErrSyntax
|
||||
}
|
||||
end = parseExtensions(scan)
|
||||
} else if n >= 4 {
|
||||
return Und, ErrSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
} else if end < len(scan.b) {
|
||||
scan.setError(ErrSyntax)
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
}
|
||||
if int(t.pVariant) < len(scan.b) {
|
||||
if end < len(s) {
|
||||
s = s[:end]
|
||||
}
|
||||
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||
t.str = s
|
||||
} else {
|
||||
t.str = string(scan.b)
|
||||
}
|
||||
} else {
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
}
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.LangID, e = getLangID(scan.token)
|
||||
scan.setError(e)
|
||||
scan.replace(t.LangID.String())
|
||||
langStart := scan.start
|
||||
end = scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
}
|
||||
scan.gobble(e)
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
t.ScriptID, e = getScriptID(script, scan.token)
|
||||
if t.ScriptID == 0 {
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
t.RegionID, e = getRegionID(scan.token)
|
||||
if t.RegionID == 0 {
|
||||
scan.gobble(e)
|
||||
} else {
|
||||
scan.replace(t.RegionID.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
t.pVariant = byte(end)
|
||||
end = parseVariants(scan, end, t)
|
||||
t.pExt = uint16(end)
|
||||
return t, end
|
||||
}
|
||||
|
||||
var separator = []byte{'-'}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||
start := scan.start
|
||||
varIDBuf := [4]uint8{}
|
||||
variantBuf := [4][]byte{}
|
||||
varID := varIDBuf[:0]
|
||||
variant := variantBuf[:0]
|
||||
last := -1
|
||||
needSort := false
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: measure the impact of needing this conversion and redesign
|
||||
// the data structure if there is an issue.
|
||||
v, ok := variantIndex[string(scan.token)]
|
||||
if !ok {
|
||||
// unknown variant
|
||||
// TODO: allow user-defined variants?
|
||||
scan.gobble(NewValueError(scan.token))
|
||||
continue
|
||||
}
|
||||
varID = append(varID, v)
|
||||
variant = append(variant, scan.token)
|
||||
if !needSort {
|
||||
if last < int(v) {
|
||||
last = int(v)
|
||||
} else {
|
||||
needSort = true
|
||||
// There is no legal combinations of more than 7 variants
|
||||
// (and this is by no means a useful sequence).
|
||||
const maxVariants = 8
|
||||
if len(varID) > maxVariants {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
end = scan.end
|
||||
}
|
||||
if needSort {
|
||||
sort.Sort(variantsSort{varID, variant})
|
||||
k, l := 0, -1
|
||||
for i, v := range varID {
|
||||
w := int(v)
|
||||
if l == w {
|
||||
// Remove duplicates.
|
||||
continue
|
||||
}
|
||||
varID[k] = varID[i]
|
||||
variant[k] = variant[i]
|
||||
k++
|
||||
l = w
|
||||
}
|
||||
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||
end = start - 1
|
||||
} else {
|
||||
scan.resizeRange(start, end, len(str))
|
||||
copy(scan.b[scan.start:], str)
|
||||
end = scan.end
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type variantsSort struct {
|
||||
i []uint8
|
||||
v [][]byte
|
||||
}
|
||||
|
||||
func (s variantsSort) Len() int {
|
||||
return len(s.i)
|
||||
}
|
||||
|
||||
func (s variantsSort) Swap(i, j int) {
|
||||
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||
}
|
||||
|
||||
func (s variantsSort) Less(i, j int) bool {
|
||||
return s.i[i] < s.i[j]
|
||||
}
|
||||
|
||||
type bytesSort struct {
|
||||
b [][]byte
|
||||
n int // first n bytes to compare
|
||||
}
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b.b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b.b[i], b.b[j] = b.b[j], b.b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
for k := 0; k < b.n; k++ {
|
||||
if b.b[i][k] == b.b[j][k] {
|
||||
continue
|
||||
}
|
||||
return b.b[i][k] < b.b[j][k]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
// It also trims scan.b to remove excess parts accordingly.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
extStart := scan.start
|
||||
ext := scan.token[0]
|
||||
end = parseExtension(scan)
|
||||
extension := scan.b[extStart:end]
|
||||
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||
scan.setError(ErrSyntax)
|
||||
end = extStart
|
||||
continue
|
||||
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||
scan.b = scan.b[:end]
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
sort.Sort(bytesSort{exts, 1})
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = scan.b[:start]
|
||||
if len(exts) > 0 {
|
||||
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||
} else if start > 0 {
|
||||
// Strip trailing '-'.
|
||||
scan.b = scan.b[:start-1]
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// parseExtension parses a single extension and returns the position of
|
||||
// the extension end.
|
||||
func parseExtension(scan *scanner) int {
|
||||
start, end := scan.start, scan.end
|
||||
switch scan.token[0] {
|
||||
case 'u':
|
||||
attrStart := end
|
||||
scan.scan()
|
||||
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||
if bytes.Compare(scan.token, last) != -1 {
|
||||
// Attributes are unsorted. Start over from scratch.
|
||||
p := attrStart + 1
|
||||
scan.next = p
|
||||
attrs := [][]byte{}
|
||||
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||
attrs = append(attrs, scan.token)
|
||||
end = scan.end
|
||||
}
|
||||
sort.Sort(bytesSort{attrs, 3})
|
||||
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||
break
|
||||
}
|
||||
last = scan.token
|
||||
end = scan.end
|
||||
}
|
||||
var last, key []byte
|
||||
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
keyEnd := scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
// TODO: check key value validity
|
||||
if keyEnd == end || bytes.Compare(key, last) != 1 {
|
||||
// We have an invalid key or the keys are not sorted.
|
||||
// Start scanning keys from scratch and reorder.
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart, keyEnd := scan.start, scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
if keyEnd != end {
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
} else {
|
||||
scan.setError(ErrSyntax)
|
||||
end = keyStart
|
||||
}
|
||||
}
|
||||
sort.Stable(bytesSort{keys, 2})
|
||||
if n := len(keys); n > 0 {
|
||||
k := 0
|
||||
for i := 1; i < n; i++ {
|
||||
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
||||
k++
|
||||
keys[k] = keys[i]
|
||||
} else if !bytes.Equal(keys[k], keys[i]) {
|
||||
scan.setError(ErrDuplicateKey)
|
||||
}
|
||||
}
|
||||
keys = keys[:k+1]
|
||||
}
|
||||
reordered := bytes.Join(keys, separator)
|
||||
if e := p + len(reordered); e < end {
|
||||
scan.deleteRange(e, end)
|
||||
end = e
|
||||
}
|
||||
copy(scan.b[p:], reordered)
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't':
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, ext string) {
|
||||
if s[p] == '-' {
|
||||
p++
|
||||
}
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p:end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, language tags will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,48 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Language {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Und is the root language.
|
||||
var Und Tag
|
@ -0,0 +1,67 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package internal
|
||||
|
||||
// This file contains matchers that implement CLDR inheritance.
|
||||
//
|
||||
// See https://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||
//
|
||||
// Some of the inheritance described in this document is already handled by
|
||||
// the cldr package.
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO: consider if (some of the) matching algorithm needs to be public after
|
||||
// getting some feel about what is generic and what is specific.
|
||||
|
||||
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
|
||||
// chain.
|
||||
//
|
||||
// The matcher uses canonicalization and the parent relationship to find a
|
||||
// match. The resulting match will always be either Und or a language with the
|
||||
// same language and script as the requested language. It will not match
|
||||
// languages for which there is understood to be mutual or one-directional
|
||||
// intelligibility.
|
||||
//
|
||||
// A Match will indicate an Exact match if the language matches after
|
||||
// canonicalization and High if the matched tag is a parent.
|
||||
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
|
||||
tags := &InheritanceMatcher{make(map[language.Tag]int)}
|
||||
for i, tag := range t {
|
||||
ct, err := language.All.Canonicalize(tag)
|
||||
if err != nil {
|
||||
ct = tag
|
||||
}
|
||||
tags.index[ct] = i
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
type InheritanceMatcher struct {
|
||||
index map[language.Tag]int
|
||||
}
|
||||
|
||||
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
|
||||
for _, t := range want {
|
||||
ct, err := language.All.Canonicalize(t)
|
||||
if err != nil {
|
||||
ct = t
|
||||
}
|
||||
conf := language.Exact
|
||||
for {
|
||||
if index, ok := m.index[ct]; ok {
|
||||
return ct, index, conf
|
||||
}
|
||||
if ct == language.Und {
|
||||
break
|
||||
}
|
||||
ct = ct.Parent()
|
||||
conf = language.High
|
||||
}
|
||||
}
|
||||
return language.Und, 0, language.No
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package number
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
)
|
||||
|
||||
// A system identifies a CLDR numbering system.
|
||||
type system byte
|
||||
|
||||
type systemData struct {
|
||||
id system
|
||||
digitSize byte // number of UTF-8 bytes per digit
|
||||
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
|
||||
}
|
||||
|
||||
// A SymbolType identifies a symbol of a specific kind.
|
||||
type SymbolType int
|
||||
|
||||
const (
|
||||
SymDecimal SymbolType = iota
|
||||
SymGroup
|
||||
SymList
|
||||
SymPercentSign
|
||||
SymPlusSign
|
||||
SymMinusSign
|
||||
SymExponential
|
||||
SymSuperscriptingExponent
|
||||
SymPerMille
|
||||
SymInfinity
|
||||
SymNan
|
||||
SymTimeSeparator
|
||||
|
||||
NumSymbolTypes
|
||||
)
|
||||
|
||||
const hasNonLatnMask = 0x8000
|
||||
|
||||
// symOffset is an offset into altSymData if the bit indicated by hasNonLatnMask
|
||||
// is not 0 (with this bit masked out), and an offset into symIndex otherwise.
|
||||
//
|
||||
// TODO: this type can be a byte again if we use an indirection into altsymData
|
||||
// and introduce an alt -> offset slice (the length of this will be number of
|
||||
// alternatives plus 1). This also allows getting rid of the compactTag field
|
||||
// in altSymData. In total this will save about 1K.
|
||||
type symOffset uint16
|
||||
|
||||
type altSymData struct {
|
||||
compactTag compact.ID
|
||||
symIndex symOffset
|
||||
system system
|
||||
}
|
@ -0,0 +1,498 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate stringer -type RoundingMode
|
||||
|
||||
package number
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// RoundingMode determines how a number is rounded to the desired precision.
|
||||
type RoundingMode byte
|
||||
|
||||
const (
|
||||
ToNearestEven RoundingMode = iota // towards the nearest integer, or towards an even number if equidistant.
|
||||
ToNearestZero // towards the nearest integer, or towards zero if equidistant.
|
||||
ToNearestAway // towards the nearest integer, or away from zero if equidistant.
|
||||
ToPositiveInf // towards infinity
|
||||
ToNegativeInf // towards negative infinity
|
||||
ToZero // towards zero
|
||||
AwayFromZero // away from zero
|
||||
numModes
|
||||
)
|
||||
|
||||
const maxIntDigits = 20
|
||||
|
||||
// A Decimal represents a floating point number in decimal format.
|
||||
// Digits represents a number [0, 1.0), and the absolute value represented by
|
||||
// Decimal is Digits * 10^Exp. Leading and trailing zeros may be omitted and Exp
|
||||
// may point outside a valid position in Digits.
|
||||
//
|
||||
// Examples:
|
||||
// Number Decimal
|
||||
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5
|
||||
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2
|
||||
// 12000 Digits: [1, 2], Exp: 5
|
||||
// 12000.00 Digits: [1, 2], Exp: 5
|
||||
// 0.00123 Digits: [1, 2, 3], Exp: -2
|
||||
// 0 Digits: [], Exp: 0
|
||||
type Decimal struct {
|
||||
digits
|
||||
|
||||
buf [maxIntDigits]byte
|
||||
}
|
||||
|
||||
type digits struct {
|
||||
Digits []byte // mantissa digits, big-endian
|
||||
Exp int32 // exponent
|
||||
Neg bool
|
||||
Inf bool // Takes precedence over Digits and Exp.
|
||||
NaN bool // Takes precedence over Inf.
|
||||
}
|
||||
|
||||
// Digits represents a floating point number represented in digits of the
|
||||
// base in which a number is to be displayed. It is similar to Decimal, but
|
||||
// keeps track of trailing fraction zeros and the comma placement for
|
||||
// engineering notation. Digits must have at least one digit.
|
||||
//
|
||||
// Examples:
|
||||
// Number Decimal
|
||||
// decimal
|
||||
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5
|
||||
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5
|
||||
// 12000 Digits: [1, 2], Exp: 5 End: 5
|
||||
// 12000.00 Digits: [1, 2], Exp: 5 End: 7
|
||||
// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3
|
||||
// 0 Digits: [], Exp: 0 End: 1
|
||||
// scientific (actual exp is Exp - Comma)
|
||||
// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1
|
||||
// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0
|
||||
// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1
|
||||
// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1
|
||||
// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0
|
||||
// engineering
|
||||
// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2
|
||||
type Digits struct {
|
||||
digits
|
||||
// End indicates the end position of the number.
|
||||
End int32 // For decimals Exp <= End. For scientific len(Digits) <= End.
|
||||
// Comma is used for the comma position for scientific (always 0 or 1) and
|
||||
// engineering notation (always 0, 1, 2, or 3).
|
||||
Comma uint8
|
||||
// IsScientific indicates whether this number is to be rendered as a
|
||||
// scientific number.
|
||||
IsScientific bool
|
||||
}
|
||||
|
||||
func (d *Digits) NumFracDigits() int {
|
||||
if d.Exp >= d.End {
|
||||
return 0
|
||||
}
|
||||
return int(d.End - d.Exp)
|
||||
}
|
||||
|
||||
// normalize returns a new Decimal with leading and trailing zeros removed.
|
||||
func (d *Decimal) normalize() (n Decimal) {
|
||||
n = *d
|
||||
b := n.Digits
|
||||
// Strip leading zeros. Resulting number of digits is significant digits.
|
||||
for len(b) > 0 && b[0] == 0 {
|
||||
b = b[1:]
|
||||
n.Exp--
|
||||
}
|
||||
// Strip trailing zeros
|
||||
for len(b) > 0 && b[len(b)-1] == 0 {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
if len(b) == 0 {
|
||||
n.Exp = 0
|
||||
}
|
||||
n.Digits = b
|
||||
return n
|
||||
}
|
||||
|
||||
func (d *Decimal) clear() {
|
||||
b := d.Digits
|
||||
if b == nil {
|
||||
b = d.buf[:0]
|
||||
}
|
||||
*d = Decimal{}
|
||||
d.Digits = b[:0]
|
||||
}
|
||||
|
||||
func (x *Decimal) String() string {
|
||||
if x.NaN {
|
||||
return "NaN"
|
||||
}
|
||||
var buf []byte
|
||||
if x.Neg {
|
||||
buf = append(buf, '-')
|
||||
}
|
||||
if x.Inf {
|
||||
buf = append(buf, "Inf"...)
|
||||
return string(buf)
|
||||
}
|
||||
switch {
|
||||
case len(x.Digits) == 0:
|
||||
buf = append(buf, '0')
|
||||
case x.Exp <= 0:
|
||||
// 0.00ddd
|
||||
buf = append(buf, "0."...)
|
||||
buf = appendZeros(buf, -int(x.Exp))
|
||||
buf = appendDigits(buf, x.Digits)
|
||||
|
||||
case /* 0 < */ int(x.Exp) < len(x.Digits):
|
||||
// dd.ddd
|
||||
buf = appendDigits(buf, x.Digits[:x.Exp])
|
||||
buf = append(buf, '.')
|
||||
buf = appendDigits(buf, x.Digits[x.Exp:])
|
||||
|
||||
default: // len(x.Digits) <= x.Exp
|
||||
// ddd00
|
||||
buf = appendDigits(buf, x.Digits)
|
||||
buf = appendZeros(buf, int(x.Exp)-len(x.Digits))
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func appendDigits(buf []byte, digits []byte) []byte {
|
||||
for _, c := range digits {
|
||||
buf = append(buf, c+'0')
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
// appendZeros appends n 0 digits to buf and returns buf.
|
||||
func appendZeros(buf []byte, n int) []byte {
|
||||
for ; n > 0; n-- {
|
||||
buf = append(buf, '0')
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (d *digits) round(mode RoundingMode, n int) {
|
||||
if n >= len(d.Digits) {
|
||||
return
|
||||
}
|
||||
// Make rounding decision: The result mantissa is truncated ("rounded down")
|
||||
// by default. Decide if we need to increment, or "round up", the (unsigned)
|
||||
// mantissa.
|
||||
inc := false
|
||||
switch mode {
|
||||
case ToNegativeInf:
|
||||
inc = d.Neg
|
||||
case ToPositiveInf:
|
||||
inc = !d.Neg
|
||||
case ToZero:
|
||||
// nothing to do
|
||||
case AwayFromZero:
|
||||
inc = true
|
||||
case ToNearestEven:
|
||||
inc = d.Digits[n] > 5 || d.Digits[n] == 5 &&
|
||||
(len(d.Digits) > n+1 || n == 0 || d.Digits[n-1]&1 != 0)
|
||||
case ToNearestAway:
|
||||
inc = d.Digits[n] >= 5
|
||||
case ToNearestZero:
|
||||
inc = d.Digits[n] > 5 || d.Digits[n] == 5 && len(d.Digits) > n+1
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
if inc {
|
||||
d.roundUp(n)
|
||||
} else {
|
||||
d.roundDown(n)
|
||||
}
|
||||
}
|
||||
|
||||
// roundFloat rounds a floating point number.
|
||||
func (r RoundingMode) roundFloat(x float64) float64 {
|
||||
// Make rounding decision: The result mantissa is truncated ("rounded down")
|
||||
// by default. Decide if we need to increment, or "round up", the (unsigned)
|
||||
// mantissa.
|
||||
abs := x
|
||||
if x < 0 {
|
||||
abs = -x
|
||||
}
|
||||
i, f := math.Modf(abs)
|
||||
if f == 0.0 {
|
||||
return x
|
||||
}
|
||||
inc := false
|
||||
switch r {
|
||||
case ToNegativeInf:
|
||||
inc = x < 0
|
||||
case ToPositiveInf:
|
||||
inc = x >= 0
|
||||
case ToZero:
|
||||
// nothing to do
|
||||
case AwayFromZero:
|
||||
inc = true
|
||||
case ToNearestEven:
|
||||
// TODO: check overflow
|
||||
inc = f > 0.5 || f == 0.5 && int64(i)&1 != 0
|
||||
case ToNearestAway:
|
||||
inc = f >= 0.5
|
||||
case ToNearestZero:
|
||||
inc = f > 0.5
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
if inc {
|
||||
i += 1
|
||||
}
|
||||
if abs != x {
|
||||
i = -i
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func (x *digits) roundUp(n int) {
|
||||
if n < 0 || n >= len(x.Digits) {
|
||||
return // nothing to do
|
||||
}
|
||||
// find first digit < 9
|
||||
for n > 0 && x.Digits[n-1] >= 9 {
|
||||
n--
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
// all digits are 9s => round up to 1 and update exponent
|
||||
x.Digits[0] = 1 // ok since len(x.Digits) > n
|
||||
x.Digits = x.Digits[:1]
|
||||
x.Exp++
|
||||
return
|
||||
}
|
||||
x.Digits[n-1]++
|
||||
x.Digits = x.Digits[:n]
|
||||
// x already trimmed
|
||||
}
|
||||
|
||||
func (x *digits) roundDown(n int) {
|
||||
if n < 0 || n >= len(x.Digits) {
|
||||
return // nothing to do
|
||||
}
|
||||
x.Digits = x.Digits[:n]
|
||||
trim(x)
|
||||
}
|
||||
|
||||
// trim cuts off any trailing zeros from x's mantissa;
|
||||
// they are meaningless for the value of x.
|
||||
func trim(x *digits) {
|
||||
i := len(x.Digits)
|
||||
for i > 0 && x.Digits[i-1] == 0 {
|
||||
i--
|
||||
}
|
||||
x.Digits = x.Digits[:i]
|
||||
if i == 0 {
|
||||
x.Exp = 0
|
||||
}
|
||||
}
|
||||
|
||||
// A Converter converts a number into decimals according to the given rounding
|
||||
// criteria.
|
||||
type Converter interface {
|
||||
Convert(d *Decimal, r RoundingContext)
|
||||
}
|
||||
|
||||
const (
|
||||
signed = true
|
||||
unsigned = false
|
||||
)
|
||||
|
||||
// Convert converts the given number to the decimal representation using the
|
||||
// supplied RoundingContext.
|
||||
func (d *Decimal) Convert(r RoundingContext, number interface{}) {
|
||||
switch f := number.(type) {
|
||||
case Converter:
|
||||
d.clear()
|
||||
f.Convert(d, r)
|
||||
case float32:
|
||||
d.ConvertFloat(r, float64(f), 32)
|
||||
case float64:
|
||||
d.ConvertFloat(r, f, 64)
|
||||
case int:
|
||||
d.ConvertInt(r, signed, uint64(f))
|
||||
case int8:
|
||||
d.ConvertInt(r, signed, uint64(f))
|
||||
case int16:
|
||||
d.ConvertInt(r, signed, uint64(f))
|
||||
case int32:
|
||||
d.ConvertInt(r, signed, uint64(f))
|
||||
case int64:
|
||||
d.ConvertInt(r, signed, uint64(f))
|
||||
case uint:
|
||||
d.ConvertInt(r, unsigned, uint64(f))
|
||||
case uint8:
|
||||
d.ConvertInt(r, unsigned, uint64(f))
|
||||
case uint16:
|
||||
d.ConvertInt(r, unsigned, uint64(f))
|
||||
case uint32:
|
||||
d.ConvertInt(r, unsigned, uint64(f))
|
||||
case uint64:
|
||||
d.ConvertInt(r, unsigned, f)
|
||||
|
||||
default:
|
||||
d.NaN = true
|
||||
// TODO:
|
||||
// case string: if produced by strconv, allows for easy arbitrary pos.
|
||||
// case reflect.Value:
|
||||
// case big.Float
|
||||
// case big.Int
|
||||
// case big.Rat?
|
||||
// catch underlyings using reflect or will this already be done by the
|
||||
// message package?
|
||||
}
|
||||
}
|
||||
|
||||
// ConvertInt converts an integer to decimals.
|
||||
func (d *Decimal) ConvertInt(r RoundingContext, signed bool, x uint64) {
|
||||
if r.Increment > 0 {
|
||||
// TODO: if uint64 is too large, fall back to float64
|
||||
if signed {
|
||||
d.ConvertFloat(r, float64(int64(x)), 64)
|
||||
} else {
|
||||
d.ConvertFloat(r, float64(x), 64)
|
||||
}
|
||||
return
|
||||
}
|
||||
d.clear()
|
||||
if signed && int64(x) < 0 {
|
||||
x = uint64(-int64(x))
|
||||
d.Neg = true
|
||||
}
|
||||
d.fillIntDigits(x)
|
||||
d.Exp = int32(len(d.Digits))
|
||||
}
|
||||
|
||||
// ConvertFloat converts a floating point number to decimals.
|
||||
func (d *Decimal) ConvertFloat(r RoundingContext, x float64, size int) {
|
||||
d.clear()
|
||||
if math.IsNaN(x) {
|
||||
d.NaN = true
|
||||
return
|
||||
}
|
||||
// Simple case: decimal notation
|
||||
if r.Increment > 0 {
|
||||
scale := int(r.IncrementScale)
|
||||
mult := 1.0
|
||||
if scale > len(scales) {
|
||||
mult = math.Pow(10, float64(scale))
|
||||
} else {
|
||||
mult = scales[scale]
|
||||
}
|
||||
// We multiply x instead of dividing inc as it gives less rounding
|
||||
// issues.
|
||||
x *= mult
|
||||
x /= float64(r.Increment)
|
||||
x = r.Mode.roundFloat(x)
|
||||
x *= float64(r.Increment)
|
||||
x /= mult
|
||||
}
|
||||
|
||||
abs := x
|
||||
if x < 0 {
|
||||
d.Neg = true
|
||||
abs = -x
|
||||
}
|
||||
if math.IsInf(abs, 1) {
|
||||
d.Inf = true
|
||||
return
|
||||
}
|
||||
|
||||
// By default we get the exact decimal representation.
|
||||
verb := byte('g')
|
||||
prec := -1
|
||||
// As the strconv API does not return the rounding accuracy, we can only
|
||||
// round using ToNearestEven.
|
||||
if r.Mode == ToNearestEven {
|
||||
if n := r.RoundSignificantDigits(); n >= 0 {
|
||||
prec = n
|
||||
} else if n = r.RoundFractionDigits(); n >= 0 {
|
||||
prec = n
|
||||
verb = 'f'
|
||||
}
|
||||
} else {
|
||||
// TODO: At this point strconv's rounding is imprecise to the point that
|
||||
// it is not useable for this purpose.
|
||||
// See https://github.com/golang/go/issues/21714
|
||||
// If rounding is requested, we ask for a large number of digits and
|
||||
// round from there to simulate rounding only once.
|
||||
// Ideally we would have strconv export an AppendDigits that would take
|
||||
// a rounding mode and/or return an accuracy. Something like this would
|
||||
// work:
|
||||
// AppendDigits(dst []byte, x float64, base, size, prec int) (digits []byte, exp, accuracy int)
|
||||
hasPrec := r.RoundSignificantDigits() >= 0
|
||||
hasScale := r.RoundFractionDigits() >= 0
|
||||
if hasPrec || hasScale {
|
||||
// prec is the number of mantissa bits plus some extra for safety.
|
||||
// We need at least the number of mantissa bits as decimals to
|
||||
// accurately represent the floating point without rounding, as each
|
||||
// bit requires one more decimal to represent: 0.5, 0.25, 0.125, ...
|
||||
prec = 60
|
||||
}
|
||||
}
|
||||
|
||||
b := strconv.AppendFloat(d.Digits[:0], abs, verb, prec, size)
|
||||
i := 0
|
||||
k := 0
|
||||
beforeDot := 1
|
||||
for i < len(b) {
|
||||
if c := b[i]; '0' <= c && c <= '9' {
|
||||
b[k] = c - '0'
|
||||
k++
|
||||
d.Exp += int32(beforeDot)
|
||||
} else if c == '.' {
|
||||
beforeDot = 0
|
||||
d.Exp = int32(k)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
d.Digits = b[:k]
|
||||
if i != len(b) {
|
||||
i += len("e")
|
||||
pSign := i
|
||||
exp := 0
|
||||
for i++; i < len(b); i++ {
|
||||
exp *= 10
|
||||
exp += int(b[i] - '0')
|
||||
}
|
||||
if b[pSign] == '-' {
|
||||
exp = -exp
|
||||
}
|
||||
d.Exp = int32(exp) + 1
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Decimal) fillIntDigits(x uint64) {
|
||||
if cap(d.Digits) < maxIntDigits {
|
||||
d.Digits = d.buf[:]
|
||||
} else {
|
||||
d.Digits = d.buf[:maxIntDigits]
|
||||
}
|
||||
i := 0
|
||||
for ; x > 0; x /= 10 {
|
||||
d.Digits[i] = byte(x % 10)
|
||||
i++
|
||||
}
|
||||
d.Digits = d.Digits[:i]
|
||||
for p := 0; p < i; p++ {
|
||||
i--
|
||||
d.Digits[p], d.Digits[i] = d.Digits[i], d.Digits[p]
|
||||
}
|
||||
}
|
||||
|
||||
var scales [70]float64
|
||||
|
||||
func init() {
|
||||
x := 1.0
|
||||
for i := range scales {
|
||||
scales[i] = x
|
||||
x *= 10
|
||||
}
|
||||
}
|
@ -0,0 +1,535 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package number
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - grouping of fractions
|
||||
// - allow user-defined superscript notation (such as <sup>4</sup>)
|
||||
// - same for non-breaking spaces, like
|
||||
|
||||
// A VisibleDigits computes digits, comma placement and trailing zeros as they
|
||||
// will be shown to the user.
|
||||
type VisibleDigits interface {
|
||||
Digits(buf []byte, t language.Tag, scale int) Digits
|
||||
// TODO: Do we also need to add the verb or pass a format.State?
|
||||
}
|
||||
|
||||
// Formatting proceeds along the following lines:
|
||||
// 0) Compose rounding information from format and context.
|
||||
// 1) Convert a number into a Decimal.
|
||||
// 2) Sanitize Decimal by adding trailing zeros, removing leading digits, and
|
||||
// (non-increment) rounding. The Decimal that results from this is suitable
|
||||
// for determining the plural form.
|
||||
// 3) Render the Decimal in the localized form.
|
||||
|
||||
// Formatter contains all the information needed to render a number.
|
||||
type Formatter struct {
|
||||
Pattern
|
||||
Info
|
||||
}
|
||||
|
||||
func (f *Formatter) init(t language.Tag, index []uint8) {
|
||||
f.Info = InfoFromTag(t)
|
||||
f.Pattern = formats[index[tagToID(t)]]
|
||||
}
|
||||
|
||||
// InitPattern initializes a Formatter for the given Pattern.
|
||||
func (f *Formatter) InitPattern(t language.Tag, pat *Pattern) {
|
||||
f.Info = InfoFromTag(t)
|
||||
f.Pattern = *pat
|
||||
}
|
||||
|
||||
// InitDecimal initializes a Formatter using the default Pattern for the given
|
||||
// language.
|
||||
func (f *Formatter) InitDecimal(t language.Tag) {
|
||||
f.init(t, tagToDecimal)
|
||||
}
|
||||
|
||||
// InitScientific initializes a Formatter using the default Pattern for the
|
||||
// given language.
|
||||
func (f *Formatter) InitScientific(t language.Tag) {
|
||||
f.init(t, tagToScientific)
|
||||
f.Pattern.MinFractionDigits = 0
|
||||
f.Pattern.MaxFractionDigits = -1
|
||||
}
|
||||
|
||||
// InitEngineering initializes a Formatter using the default Pattern for the
|
||||
// given language.
|
||||
func (f *Formatter) InitEngineering(t language.Tag) {
|
||||
f.init(t, tagToScientific)
|
||||
f.Pattern.MinFractionDigits = 0
|
||||
f.Pattern.MaxFractionDigits = -1
|
||||
f.Pattern.MaxIntegerDigits = 3
|
||||
f.Pattern.MinIntegerDigits = 1
|
||||
}
|
||||
|
||||
// InitPercent initializes a Formatter using the default Pattern for the given
|
||||
// language.
|
||||
func (f *Formatter) InitPercent(t language.Tag) {
|
||||
f.init(t, tagToPercent)
|
||||
}
|
||||
|
||||
// InitPerMille initializes a Formatter using the default Pattern for the given
|
||||
// language.
|
||||
func (f *Formatter) InitPerMille(t language.Tag) {
|
||||
f.init(t, tagToPercent)
|
||||
f.Pattern.DigitShift = 3
|
||||
}
|
||||
|
||||
func (f *Formatter) Append(dst []byte, x interface{}) []byte {
|
||||
var d Decimal
|
||||
r := f.RoundingContext
|
||||
d.Convert(r, x)
|
||||
return f.Render(dst, FormatDigits(&d, r))
|
||||
}
|
||||
|
||||
func FormatDigits(d *Decimal, r RoundingContext) Digits {
|
||||
if r.isScientific() {
|
||||
return scientificVisibleDigits(r, d)
|
||||
}
|
||||
return decimalVisibleDigits(r, d)
|
||||
}
|
||||
|
||||
func (f *Formatter) Format(dst []byte, d *Decimal) []byte {
|
||||
return f.Render(dst, FormatDigits(d, f.RoundingContext))
|
||||
}
|
||||
|
||||
func (f *Formatter) Render(dst []byte, d Digits) []byte {
|
||||
var result []byte
|
||||
var postPrefix, preSuffix int
|
||||
if d.IsScientific {
|
||||
result, postPrefix, preSuffix = appendScientific(dst, f, &d)
|
||||
} else {
|
||||
result, postPrefix, preSuffix = appendDecimal(dst, f, &d)
|
||||
}
|
||||
if f.PadRune == 0 {
|
||||
return result
|
||||
}
|
||||
width := int(f.FormatWidth)
|
||||
if count := utf8.RuneCount(result); count < width {
|
||||
insertPos := 0
|
||||
switch f.Flags & PadMask {
|
||||
case PadAfterPrefix:
|
||||
insertPos = postPrefix
|
||||
case PadBeforeSuffix:
|
||||
insertPos = preSuffix
|
||||
case PadAfterSuffix:
|
||||
insertPos = len(result)
|
||||
}
|
||||
num := width - count
|
||||
pad := [utf8.UTFMax]byte{' '}
|
||||
sz := 1
|
||||
if r := f.PadRune; r != 0 {
|
||||
sz = utf8.EncodeRune(pad[:], r)
|
||||
}
|
||||
extra := sz * num
|
||||
if n := len(result) + extra; n < cap(result) {
|
||||
result = result[:n]
|
||||
copy(result[insertPos+extra:], result[insertPos:])
|
||||
} else {
|
||||
buf := make([]byte, n)
|
||||
copy(buf, result[:insertPos])
|
||||
copy(buf[insertPos+extra:], result[insertPos:])
|
||||
result = buf
|
||||
}
|
||||
for ; num > 0; num-- {
|
||||
insertPos += copy(result[insertPos:], pad[:sz])
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// decimalVisibleDigits converts d according to the RoundingContext. Note that
|
||||
// the exponent may change as a result of this operation.
|
||||
func decimalVisibleDigits(r RoundingContext, d *Decimal) Digits {
|
||||
if d.NaN || d.Inf {
|
||||
return Digits{digits: digits{Neg: d.Neg, NaN: d.NaN, Inf: d.Inf}}
|
||||
}
|
||||
n := Digits{digits: d.normalize().digits}
|
||||
|
||||
exp := n.Exp
|
||||
exp += int32(r.DigitShift)
|
||||
|
||||
// Cap integer digits. Remove *most-significant* digits.
|
||||
if r.MaxIntegerDigits > 0 {
|
||||
if p := int(exp) - int(r.MaxIntegerDigits); p > 0 {
|
||||
if p > len(n.Digits) {
|
||||
p = len(n.Digits)
|
||||
}
|
||||
if n.Digits = n.Digits[p:]; len(n.Digits) == 0 {
|
||||
exp = 0
|
||||
} else {
|
||||
exp -= int32(p)
|
||||
}
|
||||
// Strip leading zeros.
|
||||
for len(n.Digits) > 0 && n.Digits[0] == 0 {
|
||||
n.Digits = n.Digits[1:]
|
||||
exp--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rounding if not already done by Convert.
|
||||
p := len(n.Digits)
|
||||
if maxSig := int(r.MaxSignificantDigits); maxSig > 0 {
|
||||
p = maxSig
|
||||
}
|
||||
if maxFrac := int(r.MaxFractionDigits); maxFrac >= 0 {
|
||||
if cap := int(exp) + maxFrac; cap < p {
|
||||
p = int(exp) + maxFrac
|
||||
}
|
||||
if p < 0 {
|
||||
p = 0
|
||||
}
|
||||
}
|
||||
n.round(r.Mode, p)
|
||||
|
||||
// set End (trailing zeros)
|
||||
n.End = int32(len(n.Digits))
|
||||
if n.End == 0 {
|
||||
exp = 0
|
||||
if r.MinFractionDigits > 0 {
|
||||
n.End = int32(r.MinFractionDigits)
|
||||
}
|
||||
if p := int32(r.MinSignificantDigits) - 1; p > n.End {
|
||||
n.End = p
|
||||
}
|
||||
} else {
|
||||
if end := exp + int32(r.MinFractionDigits); end > n.End {
|
||||
n.End = end
|
||||
}
|
||||
if n.End < int32(r.MinSignificantDigits) {
|
||||
n.End = int32(r.MinSignificantDigits)
|
||||
}
|
||||
}
|
||||
n.Exp = exp
|
||||
return n
|
||||
}
|
||||
|
||||
// appendDecimal appends a formatted number to dst. It returns two possible
|
||||
// insertion points for padding.
|
||||
func appendDecimal(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, preSuf int) {
|
||||
if dst, ok := f.renderSpecial(dst, n); ok {
|
||||
return dst, 0, len(dst)
|
||||
}
|
||||
digits := n.Digits
|
||||
exp := n.Exp
|
||||
|
||||
// Split in integer and fraction part.
|
||||
var intDigits, fracDigits []byte
|
||||
numInt := 0
|
||||
numFrac := int(n.End - n.Exp)
|
||||
if exp > 0 {
|
||||
numInt = int(exp)
|
||||
if int(exp) >= len(digits) { // ddddd | ddddd00
|
||||
intDigits = digits
|
||||
} else { // ddd.dd
|
||||
intDigits = digits[:exp]
|
||||
fracDigits = digits[exp:]
|
||||
}
|
||||
} else {
|
||||
fracDigits = digits
|
||||
}
|
||||
|
||||
neg := n.Neg
|
||||
affix, suffix := f.getAffixes(neg)
|
||||
dst = appendAffix(dst, f, affix, neg)
|
||||
savedLen := len(dst)
|
||||
|
||||
minInt := int(f.MinIntegerDigits)
|
||||
if minInt == 0 && f.MinSignificantDigits > 0 {
|
||||
minInt = 1
|
||||
}
|
||||
// add leading zeros
|
||||
for i := minInt; i > numInt; i-- {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
if f.needsSep(i) {
|
||||
dst = append(dst, f.Symbol(SymGroup)...)
|
||||
}
|
||||
}
|
||||
i := 0
|
||||
for ; i < len(intDigits); i++ {
|
||||
dst = f.AppendDigit(dst, intDigits[i])
|
||||
if f.needsSep(numInt - i) {
|
||||
dst = append(dst, f.Symbol(SymGroup)...)
|
||||
}
|
||||
}
|
||||
for ; i < numInt; i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
if f.needsSep(numInt - i) {
|
||||
dst = append(dst, f.Symbol(SymGroup)...)
|
||||
}
|
||||
}
|
||||
|
||||
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
|
||||
dst = append(dst, f.Symbol(SymDecimal)...)
|
||||
}
|
||||
// Add trailing zeros
|
||||
i = 0
|
||||
for n := -int(n.Exp); i < n; i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
}
|
||||
for _, d := range fracDigits {
|
||||
i++
|
||||
dst = f.AppendDigit(dst, d)
|
||||
}
|
||||
for ; i < numFrac; i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
}
|
||||
return appendAffix(dst, f, suffix, neg), savedLen, len(dst)
|
||||
}
|
||||
|
||||
func scientificVisibleDigits(r RoundingContext, d *Decimal) Digits {
|
||||
if d.NaN || d.Inf {
|
||||
return Digits{digits: digits{Neg: d.Neg, NaN: d.NaN, Inf: d.Inf}}
|
||||
}
|
||||
n := Digits{digits: d.normalize().digits, IsScientific: true}
|
||||
|
||||
// Normalize to have at least one digit. This simplifies engineering
|
||||
// notation.
|
||||
if len(n.Digits) == 0 {
|
||||
n.Digits = append(n.Digits, 0)
|
||||
n.Exp = 1
|
||||
}
|
||||
|
||||
// Significant digits are transformed by the parser for scientific notation
|
||||
// and do not need to be handled here.
|
||||
maxInt, numInt := int(r.MaxIntegerDigits), int(r.MinIntegerDigits)
|
||||
if numInt == 0 {
|
||||
numInt = 1
|
||||
}
|
||||
|
||||
// If a maximum number of integers is specified, the minimum must be 1
|
||||
// and the exponent is grouped by this number (e.g. for engineering)
|
||||
if maxInt > numInt {
|
||||
// Correct the exponent to reflect a single integer digit.
|
||||
numInt = 1
|
||||
// engineering
|
||||
// 0.01234 ([12345]e-1) -> 1.2345e-2 12.345e-3
|
||||
// 12345 ([12345]e+5) -> 1.2345e4 12.345e3
|
||||
d := int(n.Exp-1) % maxInt
|
||||
if d < 0 {
|
||||
d += maxInt
|
||||
}
|
||||
numInt += d
|
||||
}
|
||||
|
||||
p := len(n.Digits)
|
||||
if maxSig := int(r.MaxSignificantDigits); maxSig > 0 {
|
||||
p = maxSig
|
||||
}
|
||||
if maxFrac := int(r.MaxFractionDigits); maxFrac >= 0 && numInt+maxFrac < p {
|
||||
p = numInt + maxFrac
|
||||
}
|
||||
n.round(r.Mode, p)
|
||||
|
||||
n.Comma = uint8(numInt)
|
||||
n.End = int32(len(n.Digits))
|
||||
if minSig := int32(r.MinFractionDigits) + int32(numInt); n.End < minSig {
|
||||
n.End = minSig
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// appendScientific appends a formatted number to dst. It returns two possible
|
||||
// insertion points for padding.
|
||||
func appendScientific(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, preSuf int) {
|
||||
if dst, ok := f.renderSpecial(dst, n); ok {
|
||||
return dst, 0, 0
|
||||
}
|
||||
digits := n.Digits
|
||||
numInt := int(n.Comma)
|
||||
numFrac := int(n.End) - int(n.Comma)
|
||||
|
||||
var intDigits, fracDigits []byte
|
||||
if numInt <= len(digits) {
|
||||
intDigits = digits[:numInt]
|
||||
fracDigits = digits[numInt:]
|
||||
} else {
|
||||
intDigits = digits
|
||||
}
|
||||
neg := n.Neg
|
||||
affix, suffix := f.getAffixes(neg)
|
||||
dst = appendAffix(dst, f, affix, neg)
|
||||
savedLen := len(dst)
|
||||
|
||||
i := 0
|
||||
for ; i < len(intDigits); i++ {
|
||||
dst = f.AppendDigit(dst, intDigits[i])
|
||||
if f.needsSep(numInt - i) {
|
||||
dst = append(dst, f.Symbol(SymGroup)...)
|
||||
}
|
||||
}
|
||||
for ; i < numInt; i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
if f.needsSep(numInt - i) {
|
||||
dst = append(dst, f.Symbol(SymGroup)...)
|
||||
}
|
||||
}
|
||||
|
||||
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
|
||||
dst = append(dst, f.Symbol(SymDecimal)...)
|
||||
}
|
||||
i = 0
|
||||
for ; i < len(fracDigits); i++ {
|
||||
dst = f.AppendDigit(dst, fracDigits[i])
|
||||
}
|
||||
for ; i < numFrac; i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
}
|
||||
|
||||
// exp
|
||||
buf := [12]byte{}
|
||||
// TODO: use exponential if superscripting is not available (no Latin
|
||||
// numbers or no tags) and use exponential in all other cases.
|
||||
exp := n.Exp - int32(n.Comma)
|
||||
exponential := f.Symbol(SymExponential)
|
||||
if exponential == "E" {
|
||||
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
|
||||
dst = append(dst, f.Symbol(SymSuperscriptingExponent)...)
|
||||
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
|
||||
dst = f.AppendDigit(dst, 1)
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
switch {
|
||||
case exp < 0:
|
||||
dst = append(dst, superMinus...)
|
||||
exp = -exp
|
||||
case f.Flags&AlwaysExpSign != 0:
|
||||
dst = append(dst, superPlus...)
|
||||
}
|
||||
b = strconv.AppendUint(buf[:0], uint64(exp), 10)
|
||||
for i := len(b); i < int(f.MinExponentDigits); i++ {
|
||||
dst = append(dst, superDigits[0]...)
|
||||
}
|
||||
for _, c := range b {
|
||||
dst = append(dst, superDigits[c-'0']...)
|
||||
}
|
||||
} else {
|
||||
dst = append(dst, exponential...)
|
||||
switch {
|
||||
case exp < 0:
|
||||
dst = append(dst, f.Symbol(SymMinusSign)...)
|
||||
exp = -exp
|
||||
case f.Flags&AlwaysExpSign != 0:
|
||||
dst = append(dst, f.Symbol(SymPlusSign)...)
|
||||
}
|
||||
b = strconv.AppendUint(buf[:0], uint64(exp), 10)
|
||||
for i := len(b); i < int(f.MinExponentDigits); i++ {
|
||||
dst = f.AppendDigit(dst, 0)
|
||||
}
|
||||
for _, c := range b {
|
||||
dst = f.AppendDigit(dst, c-'0')
|
||||
}
|
||||
}
|
||||
return appendAffix(dst, f, suffix, neg), savedLen, len(dst)
|
||||
}
|
||||
|
||||
const (
|
||||
superMinus = "\u207B" // SUPERSCRIPT HYPHEN-MINUS
|
||||
superPlus = "\u207A" // SUPERSCRIPT PLUS SIGN
|
||||
)
|
||||
|
||||
var (
|
||||
// Note: the digits are not sequential!!!
|
||||
superDigits = []string{
|
||||
"\u2070", // SUPERSCRIPT DIGIT ZERO
|
||||
"\u00B9", // SUPERSCRIPT DIGIT ONE
|
||||
"\u00B2", // SUPERSCRIPT DIGIT TWO
|
||||
"\u00B3", // SUPERSCRIPT DIGIT THREE
|
||||
"\u2074", // SUPERSCRIPT DIGIT FOUR
|
||||
"\u2075", // SUPERSCRIPT DIGIT FIVE
|
||||
"\u2076", // SUPERSCRIPT DIGIT SIX
|
||||
"\u2077", // SUPERSCRIPT DIGIT SEVEN
|
||||
"\u2078", // SUPERSCRIPT DIGIT EIGHT
|
||||
"\u2079", // SUPERSCRIPT DIGIT NINE
|
||||
}
|
||||
)
|
||||
|
||||
func (f *Formatter) getAffixes(neg bool) (affix, suffix string) {
|
||||
str := f.Affix
|
||||
if str != "" {
|
||||
if f.NegOffset > 0 {
|
||||
if neg {
|
||||
str = str[f.NegOffset:]
|
||||
} else {
|
||||
str = str[:f.NegOffset]
|
||||
}
|
||||
}
|
||||
sufStart := 1 + str[0]
|
||||
affix = str[1:sufStart]
|
||||
suffix = str[sufStart+1:]
|
||||
}
|
||||
// TODO: introduce a NeedNeg sign to indicate if the left pattern already
|
||||
// has a sign marked?
|
||||
if f.NegOffset == 0 && (neg || f.Flags&AlwaysSign != 0) {
|
||||
affix = "-" + affix
|
||||
}
|
||||
return affix, suffix
|
||||
}
|
||||
|
||||
func (f *Formatter) renderSpecial(dst []byte, d *Digits) (b []byte, ok bool) {
|
||||
if d.NaN {
|
||||
return fmtNaN(dst, f), true
|
||||
}
|
||||
if d.Inf {
|
||||
return fmtInfinite(dst, f, d), true
|
||||
}
|
||||
return dst, false
|
||||
}
|
||||
|
||||
func fmtNaN(dst []byte, f *Formatter) []byte {
|
||||
return append(dst, f.Symbol(SymNan)...)
|
||||
}
|
||||
|
||||
func fmtInfinite(dst []byte, f *Formatter, d *Digits) []byte {
|
||||
affix, suffix := f.getAffixes(d.Neg)
|
||||
dst = appendAffix(dst, f, affix, d.Neg)
|
||||
dst = append(dst, f.Symbol(SymInfinity)...)
|
||||
dst = appendAffix(dst, f, suffix, d.Neg)
|
||||
return dst
|
||||
}
|
||||
|
||||
func appendAffix(dst []byte, f *Formatter, affix string, neg bool) []byte {
|
||||
quoting := false
|
||||
escaping := false
|
||||
for _, r := range affix {
|
||||
switch {
|
||||
case escaping:
|
||||
// escaping occurs both inside and outside of quotes
|
||||
dst = append(dst, string(r)...)
|
||||
escaping = false
|
||||
case r == '\\':
|
||||
escaping = true
|
||||
case r == '\'':
|
||||
quoting = !quoting
|
||||
case quoting:
|
||||
dst = append(dst, string(r)...)
|
||||
case r == '%':
|
||||
if f.DigitShift == 3 {
|
||||
dst = append(dst, f.Symbol(SymPerMille)...)
|
||||
} else {
|
||||
dst = append(dst, f.Symbol(SymPercentSign)...)
|
||||
}
|
||||
case r == '-' || r == '+':
|
||||
if neg {
|
||||
dst = append(dst, f.Symbol(SymMinusSign)...)
|
||||
} else if f.Flags&ElideSign == 0 {
|
||||
dst = append(dst, f.Symbol(SymPlusSign)...)
|
||||
} else {
|
||||
dst = append(dst, ' ')
|
||||
}
|
||||
default:
|
||||
dst = append(dst, string(r)...)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
@ -0,0 +1,458 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/internal/number"
|
||||
"golang.org/x/text/internal/stringset"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output", "tables.go", "output file")
|
||||
outputTestFile = flag.String("testoutput", "data_test.go", "output file")
|
||||
|
||||
draft = flag.String("draft",
|
||||
"contributed",
|
||||
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
const pkg = "number"
|
||||
|
||||
gen.Repackage("gen_common.go", "common.go", pkg)
|
||||
// Read the CLDR zip file.
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("supplemental", "main")
|
||||
d.SetSectionFilter("numbers", "numberingSystem")
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, pkg)
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
|
||||
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
genNumSystem(w, data)
|
||||
genSymbols(w, data)
|
||||
genFormats(w, data)
|
||||
}
|
||||
|
||||
var systemMap = map[string]system{"latn": 0}
|
||||
|
||||
func getNumberSystem(str string) system {
|
||||
ns, ok := systemMap[str]
|
||||
if !ok {
|
||||
log.Fatalf("No index for numbering system %q", str)
|
||||
}
|
||||
return ns
|
||||
}
|
||||
|
||||
func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
|
||||
numSysData := []systemData{
|
||||
{digitSize: 1, zero: [4]byte{'0'}},
|
||||
}
|
||||
|
||||
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
||||
if len(ns.Digits) == 0 {
|
||||
continue
|
||||
}
|
||||
switch ns.Id {
|
||||
case "latn":
|
||||
// hard-wired
|
||||
continue
|
||||
case "hanidec":
|
||||
// non-consecutive digits: treat as "algorithmic"
|
||||
continue
|
||||
}
|
||||
|
||||
zero, sz := utf8.DecodeRuneInString(ns.Digits)
|
||||
if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
|
||||
log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
|
||||
}
|
||||
|
||||
i := rune(0)
|
||||
for _, r := range ns.Digits {
|
||||
// Verify that we can do simple math on the UTF-8 byte sequence
|
||||
// of zero to get the digit.
|
||||
if zero+i != r {
|
||||
// Runes not consecutive.
|
||||
log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
|
||||
}
|
||||
i++
|
||||
}
|
||||
var x [utf8.UTFMax]byte
|
||||
utf8.EncodeRune(x[:], zero)
|
||||
id := system(len(numSysData))
|
||||
systemMap[ns.Id] = id
|
||||
numSysData = append(numSysData, systemData{
|
||||
id: id,
|
||||
digitSize: byte(sz),
|
||||
zero: x,
|
||||
})
|
||||
}
|
||||
w.WriteVar("numSysData", numSysData)
|
||||
|
||||
algoID := system(len(numSysData))
|
||||
fmt.Fprintln(w, "const (")
|
||||
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
||||
id, ok := systemMap[ns.Id]
|
||||
if !ok {
|
||||
id = algoID
|
||||
systemMap[ns.Id] = id
|
||||
algoID++
|
||||
}
|
||||
fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
|
||||
}
|
||||
fmt.Fprintln(w, "numNumberSystems")
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
fmt.Fprintln(w, "var systemMap = map[string]system{")
|
||||
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
||||
fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
|
||||
w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
|
||||
d, err := cldr.ParseDraft(*draft)
|
||||
if err != nil {
|
||||
log.Fatalf("invalid draft level: %v", err)
|
||||
}
|
||||
|
||||
nNumberSystems := system(len(systemMap))
|
||||
|
||||
type symbols [NumSymbolTypes]string
|
||||
|
||||
type key struct {
|
||||
tag compact.ID
|
||||
system system
|
||||
}
|
||||
symbolMap := map[key]*symbols{}
|
||||
|
||||
defaults := map[compact.ID]system{}
|
||||
|
||||
for _, lang := range data.Locales() {
|
||||
ldml := data.RawLDML(lang)
|
||||
if ldml.Numbers == nil {
|
||||
continue
|
||||
}
|
||||
langIndex, ok := compact.FromTag(language.MustParse(lang))
|
||||
if !ok {
|
||||
log.Fatalf("No compact index for language %s", lang)
|
||||
}
|
||||
if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
|
||||
defaults[langIndex] = getNumberSystem(d[0].Data())
|
||||
}
|
||||
|
||||
syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
|
||||
syms.SelectDraft(d)
|
||||
|
||||
getFirst := func(name string, x interface{}) string {
|
||||
v := reflect.ValueOf(x)
|
||||
slice := cldr.MakeSlice(x)
|
||||
slice.SelectAnyOf("alt", "", "alt")
|
||||
if reflect.Indirect(v).Len() == 0 {
|
||||
return ""
|
||||
} else if reflect.Indirect(v).Len() > 1 {
|
||||
log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
|
||||
}
|
||||
return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
|
||||
}
|
||||
|
||||
for _, sym := range ldml.Numbers.Symbols {
|
||||
if sym.NumberSystem == "" {
|
||||
// This is just linking the default of root to "latn".
|
||||
continue
|
||||
}
|
||||
symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
|
||||
SymDecimal: getFirst("decimal", &sym.Decimal),
|
||||
SymGroup: getFirst("group", &sym.Group),
|
||||
SymList: getFirst("list", &sym.List),
|
||||
SymPercentSign: getFirst("percentSign", &sym.PercentSign),
|
||||
SymPlusSign: getFirst("plusSign", &sym.PlusSign),
|
||||
SymMinusSign: getFirst("minusSign", &sym.MinusSign),
|
||||
SymExponential: getFirst("exponential", &sym.Exponential),
|
||||
SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
|
||||
SymPerMille: getFirst("perMille", &sym.PerMille),
|
||||
SymInfinity: getFirst("infinity", &sym.Infinity),
|
||||
SymNan: getFirst("nan", &sym.Nan),
|
||||
SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Expand all values.
|
||||
for k, syms := range symbolMap {
|
||||
for t := SymDecimal; t < NumSymbolTypes; t++ {
|
||||
p := k.tag
|
||||
for syms[t] == "" {
|
||||
p = p.Parent()
|
||||
if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
|
||||
syms[t] = (*pSyms)[t]
|
||||
break
|
||||
}
|
||||
if p == 0 /* und */ {
|
||||
// Default to root, latn.
|
||||
syms[t] = (*symbolMap[key{}])[t]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unique the symbol sets and write the string data.
|
||||
m := map[symbols]int{}
|
||||
sb := stringset.NewBuilder()
|
||||
|
||||
symIndex := [][NumSymbolTypes]byte{}
|
||||
|
||||
for ns := system(0); ns < nNumberSystems; ns++ {
|
||||
for _, l := range data.Locales() {
|
||||
langIndex, _ := compact.FromTag(language.MustParse(l))
|
||||
s := symbolMap[key{langIndex, ns}]
|
||||
if s == nil {
|
||||
continue
|
||||
}
|
||||
if _, ok := m[*s]; !ok {
|
||||
m[*s] = len(symIndex)
|
||||
sb.Add(s[:]...)
|
||||
var x [NumSymbolTypes]byte
|
||||
for i := SymDecimal; i < NumSymbolTypes; i++ {
|
||||
x[i] = byte(sb.Index((*s)[i]))
|
||||
}
|
||||
symIndex = append(symIndex, x)
|
||||
}
|
||||
}
|
||||
}
|
||||
w.WriteVar("symIndex", symIndex)
|
||||
w.WriteVar("symData", sb.Set())
|
||||
|
||||
// resolveSymbolIndex gets the index from the closest matching locale,
|
||||
// including the locale itself.
|
||||
resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
|
||||
for {
|
||||
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
|
||||
return symOffset(m[*sym])
|
||||
}
|
||||
if langIndex == 0 {
|
||||
return 0 // und, latn
|
||||
}
|
||||
langIndex = langIndex.Parent()
|
||||
}
|
||||
}
|
||||
|
||||
// Create an index with the symbols for each locale for the latn numbering
|
||||
// system. If this is not the default, or the only one, for a locale, we
|
||||
// will overwrite the value later.
|
||||
var langToDefaults [compact.NumCompactTags]symOffset
|
||||
for _, l := range data.Locales() {
|
||||
langIndex, _ := compact.FromTag(language.MustParse(l))
|
||||
langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
|
||||
}
|
||||
|
||||
// Delete redundant entries.
|
||||
for _, l := range data.Locales() {
|
||||
langIndex, _ := compact.FromTag(language.MustParse(l))
|
||||
def := defaults[langIndex]
|
||||
syms := symbolMap[key{langIndex, def}]
|
||||
if syms == nil {
|
||||
continue
|
||||
}
|
||||
for ns := system(0); ns < nNumberSystems; ns++ {
|
||||
if ns == def {
|
||||
continue
|
||||
}
|
||||
if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
|
||||
delete(symbolMap, key{langIndex, ns})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a sorted list of alternatives per language. This will only need to
|
||||
// be referenced if a user specified an alternative numbering system.
|
||||
var langToAlt []altSymData
|
||||
for _, l := range data.Locales() {
|
||||
langIndex, _ := compact.FromTag(language.MustParse(l))
|
||||
start := len(langToAlt)
|
||||
if start >= hasNonLatnMask {
|
||||
log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
|
||||
}
|
||||
// Create the entry for the default value.
|
||||
def := defaults[langIndex]
|
||||
langToAlt = append(langToAlt, altSymData{
|
||||
compactTag: langIndex,
|
||||
system: def,
|
||||
symIndex: resolveSymbolIndex(langIndex, def),
|
||||
})
|
||||
|
||||
for ns := system(0); ns < nNumberSystems; ns++ {
|
||||
if def == ns {
|
||||
continue
|
||||
}
|
||||
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
|
||||
langToAlt = append(langToAlt, altSymData{
|
||||
compactTag: langIndex,
|
||||
system: ns,
|
||||
symIndex: resolveSymbolIndex(langIndex, ns),
|
||||
})
|
||||
}
|
||||
}
|
||||
if def == 0 && len(langToAlt) == start+1 {
|
||||
// No additional data: erase the entry.
|
||||
langToAlt = langToAlt[:start]
|
||||
} else {
|
||||
// Overwrite the entry in langToDefaults.
|
||||
langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
|
||||
}
|
||||
}
|
||||
w.WriteComment(`
|
||||
langToDefaults maps a compact language index to the default numbering system
|
||||
and default symbol set`)
|
||||
w.WriteVar("langToDefaults", langToDefaults)
|
||||
|
||||
w.WriteComment(`
|
||||
langToAlt is a list of numbering system and symbol set pairs, sorted and
|
||||
marked by compact language index.`)
|
||||
w.WriteVar("langToAlt", langToAlt)
|
||||
}
|
||||
|
||||
// genFormats generates the lookup table for decimal, scientific and percent
|
||||
// patterns.
|
||||
//
|
||||
// CLDR allows for patterns to be different per language for different numbering
|
||||
// systems. In practice the patterns are set to be consistent for a language
|
||||
// independent of the numbering system. genFormats verifies that no language
|
||||
// deviates from this.
|
||||
func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
|
||||
d, err := cldr.ParseDraft(*draft)
|
||||
if err != nil {
|
||||
log.Fatalf("invalid draft level: %v", err)
|
||||
}
|
||||
|
||||
// Fill the first slot with a dummy so we can identify unspecified tags.
|
||||
formats := []number.Pattern{{}}
|
||||
patterns := map[string]int{}
|
||||
|
||||
// TODO: It would be possible to eliminate two of these slices by having
|
||||
// another indirection and store a reference to the combination of patterns.
|
||||
decimal := make([]byte, compact.NumCompactTags)
|
||||
scientific := make([]byte, compact.NumCompactTags)
|
||||
percent := make([]byte, compact.NumCompactTags)
|
||||
|
||||
for _, lang := range data.Locales() {
|
||||
ldml := data.RawLDML(lang)
|
||||
if ldml.Numbers == nil {
|
||||
continue
|
||||
}
|
||||
langIndex, ok := compact.FromTag(language.MustParse(lang))
|
||||
if !ok {
|
||||
log.Fatalf("No compact index for language %s", lang)
|
||||
}
|
||||
type patternSlice []*struct {
|
||||
cldr.Common
|
||||
Numbers string `xml:"numbers,attr"`
|
||||
Count string `xml:"count,attr"`
|
||||
}
|
||||
|
||||
add := func(name string, tags []byte, ps patternSlice) {
|
||||
sl := cldr.MakeSlice(&ps)
|
||||
sl.SelectDraft(d)
|
||||
if len(ps) == 0 {
|
||||
return
|
||||
}
|
||||
if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
|
||||
log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
|
||||
}
|
||||
s := ps[0].Data()
|
||||
|
||||
index, ok := patterns[s]
|
||||
if !ok {
|
||||
nf, err := number.ParsePattern(s)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
index = len(formats)
|
||||
patterns[s] = index
|
||||
formats = append(formats, *nf)
|
||||
}
|
||||
tags[langIndex] = byte(index)
|
||||
}
|
||||
|
||||
for _, df := range ldml.Numbers.DecimalFormats {
|
||||
for _, l := range df.DecimalFormatLength {
|
||||
if l.Type != "" {
|
||||
continue
|
||||
}
|
||||
for _, f := range l.DecimalFormat {
|
||||
add("decimal", decimal, f.Pattern)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, df := range ldml.Numbers.ScientificFormats {
|
||||
for _, l := range df.ScientificFormatLength {
|
||||
if l.Type != "" {
|
||||
continue
|
||||
}
|
||||
for _, f := range l.ScientificFormat {
|
||||
add("scientific", scientific, f.Pattern)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, df := range ldml.Numbers.PercentFormats {
|
||||
for _, l := range df.PercentFormatLength {
|
||||
if l.Type != "" {
|
||||
continue
|
||||
}
|
||||
for _, f := range l.PercentFormat {
|
||||
add("percent", percent, f.Pattern)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Complete the parent tag array to reflect inheritance. An index of 0
|
||||
// indicates an unspecified value.
|
||||
for _, data := range [][]byte{decimal, scientific, percent} {
|
||||
for i := range data {
|
||||
p := compact.ID(i)
|
||||
for ; data[p] == 0; p = p.Parent() {
|
||||
}
|
||||
data[i] = data[p]
|
||||
}
|
||||
}
|
||||
w.WriteVar("tagToDecimal", decimal)
|
||||
w.WriteVar("tagToScientific", scientific)
|
||||
w.WriteVar("tagToPercent", percent)
|
||||
|
||||
value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
|
||||
// Break up the lines. This won't give ideal perfect formatting, but it is
|
||||
// better than one huge line.
|
||||
value = strings.Replace(value, ", ", ",\n", -1)
|
||||
fmt.Fprintf(w, "var formats = %s\n", value)
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
)
|
||||
|
||||
// A system identifies a CLDR numbering system.
|
||||
type system byte
|
||||
|
||||
type systemData struct {
|
||||
id system
|
||||
digitSize byte // number of UTF-8 bytes per digit
|
||||
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
|
||||
}
|
||||
|
||||
// A SymbolType identifies a symbol of a specific kind.
|
||||
type SymbolType int
|
||||
|
||||
const (
|
||||
SymDecimal SymbolType = iota
|
||||
SymGroup
|
||||
SymList
|
||||
SymPercentSign
|
||||
SymPlusSign
|
||||
SymMinusSign
|
||||
SymExponential
|
||||
SymSuperscriptingExponent
|
||||
SymPerMille
|
||||
SymInfinity
|
||||
SymNan
|
||||
SymTimeSeparator
|
||||
|
||||
NumSymbolTypes
|
||||
)
|
||||
|
||||
const hasNonLatnMask = 0x8000
|
||||
|
||||
// symOffset is an offset into altSymData if the bit indicated by hasNonLatnMask
|
||||
// is not 0 (with this bit masked out), and an offset into symIndex otherwise.
|
||||
//
|
||||
// TODO: this type can be a byte again if we use an indirection into altsymData
|
||||
// and introduce an alt -> offset slice (the length of this will be number of
|
||||
// alternatives plus 1). This also allows getting rid of the compactTag field
|
||||
// in altSymData. In total this will save about 1K.
|
||||
type symOffset uint16
|
||||
|
||||
type altSymData struct {
|
||||
compactTag compact.ID
|
||||
symIndex symOffset
|
||||
system system
|
||||
}
|
@ -0,0 +1,152 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go
|
||||
|
||||
// Package number contains tools and data for formatting numbers.
|
||||
package number
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// Info holds number formatting configuration data.
|
||||
type Info struct {
|
||||
system systemData // numbering system information
|
||||
symIndex symOffset // index to symbols
|
||||
}
|
||||
|
||||
// InfoFromLangID returns a Info for the given compact language identifier and
|
||||
// numbering system identifier. If system is the empty string, the default
|
||||
// numbering system will be taken for that language.
|
||||
func InfoFromLangID(compactIndex compact.ID, numberSystem string) Info {
|
||||
p := langToDefaults[compactIndex]
|
||||
// Lookup the entry for the language.
|
||||
pSymIndex := symOffset(0) // Default: Latin, default symbols
|
||||
system, ok := systemMap[numberSystem]
|
||||
if !ok {
|
||||
// Take the value for the default numbering system. This is by far the
|
||||
// most common case as an alternative numbering system is hardly used.
|
||||
if p&hasNonLatnMask == 0 { // Latn digits.
|
||||
pSymIndex = p
|
||||
} else { // Non-Latn or multiple numbering systems.
|
||||
// Take the first entry from the alternatives list.
|
||||
data := langToAlt[p&^hasNonLatnMask]
|
||||
pSymIndex = data.symIndex
|
||||
system = data.system
|
||||
}
|
||||
} else {
|
||||
langIndex := compactIndex
|
||||
ns := system
|
||||
outerLoop:
|
||||
for ; ; p = langToDefaults[langIndex] {
|
||||
if p&hasNonLatnMask == 0 {
|
||||
if ns == 0 {
|
||||
// The index directly points to the symbol data.
|
||||
pSymIndex = p
|
||||
break
|
||||
}
|
||||
// Move to the parent and retry.
|
||||
langIndex = langIndex.Parent()
|
||||
} else {
|
||||
// The index points to a list of symbol data indexes.
|
||||
for _, e := range langToAlt[p&^hasNonLatnMask:] {
|
||||
if e.compactTag != langIndex {
|
||||
if langIndex == 0 {
|
||||
// The CLDR root defines full symbol information for
|
||||
// all numbering systems (even though mostly by
|
||||
// means of aliases). Fall back to the default entry
|
||||
// for Latn if there is no data for the numbering
|
||||
// system of this language.
|
||||
if ns == 0 {
|
||||
break
|
||||
}
|
||||
// Fall back to Latin and start from the original
|
||||
// language. See
|
||||
// https://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||
ns = numLatn
|
||||
langIndex = compactIndex
|
||||
continue outerLoop
|
||||
}
|
||||
// Fall back to parent.
|
||||
langIndex = langIndex.Parent()
|
||||
} else if e.system == ns {
|
||||
pSymIndex = e.symIndex
|
||||
break outerLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if int(system) >= len(numSysData) { // algorithmic
|
||||
// Will generate ASCII digits in case the user inadvertently calls
|
||||
// WriteDigit or Digit on it.
|
||||
d := numSysData[0]
|
||||
d.id = system
|
||||
return Info{
|
||||
system: d,
|
||||
symIndex: pSymIndex,
|
||||
}
|
||||
}
|
||||
return Info{
|
||||
system: numSysData[system],
|
||||
symIndex: pSymIndex,
|
||||
}
|
||||
}
|
||||
|
||||
// InfoFromTag returns a Info for the given language tag.
|
||||
func InfoFromTag(t language.Tag) Info {
|
||||
return InfoFromLangID(tagToID(t), t.TypeForKey("nu"))
|
||||
}
|
||||
|
||||
// IsDecimal reports if the numbering system can convert decimal to native
|
||||
// symbols one-to-one.
|
||||
func (n Info) IsDecimal() bool {
|
||||
return int(n.system.id) < len(numSysData)
|
||||
}
|
||||
|
||||
// WriteDigit writes the UTF-8 sequence for n corresponding to the given ASCII
|
||||
// digit to dst and reports the number of bytes written. dst must be large
|
||||
// enough to hold the rune (can be up to utf8.UTFMax bytes).
|
||||
func (n Info) WriteDigit(dst []byte, asciiDigit rune) int {
|
||||
copy(dst, n.system.zero[:n.system.digitSize])
|
||||
dst[n.system.digitSize-1] += byte(asciiDigit - '0')
|
||||
return int(n.system.digitSize)
|
||||
}
|
||||
|
||||
// AppendDigit appends the UTF-8 sequence for n corresponding to the given digit
|
||||
// to dst and reports the number of bytes written. dst must be large enough to
|
||||
// hold the rune (can be up to utf8.UTFMax bytes).
|
||||
func (n Info) AppendDigit(dst []byte, digit byte) []byte {
|
||||
dst = append(dst, n.system.zero[:n.system.digitSize]...)
|
||||
dst[len(dst)-1] += digit
|
||||
return dst
|
||||
}
|
||||
|
||||
// Digit returns the digit for the numbering system for the corresponding ASCII
|
||||
// value. For example, ni.Digit('3') could return '三'. Note that the argument
|
||||
// is the rune constant '3', which equals 51, not the integer constant 3.
|
||||
func (n Info) Digit(asciiDigit rune) rune {
|
||||
var x [utf8.UTFMax]byte
|
||||
n.WriteDigit(x[:], asciiDigit)
|
||||
r, _ := utf8.DecodeRune(x[:])
|
||||
return r
|
||||
}
|
||||
|
||||
// Symbol returns the string for the given symbol type.
|
||||
func (n Info) Symbol(t SymbolType) string {
|
||||
return symData.Elem(int(symIndex[n.symIndex][t]))
|
||||
}
|
||||
|
||||
func formatForLang(t language.Tag, index []byte) *Pattern {
|
||||
return &formats[index[tagToID(t)]]
|
||||
}
|
||||
|
||||
func tagToID(t language.Tag) compact.ID {
|
||||
id, _ := compact.RegionalID(compact.Tag(t))
|
||||
return id
|
||||
}
|
@ -0,0 +1,485 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package number
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// This file contains a parser for the CLDR number patterns as described in
|
||||
// https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
|
||||
//
|
||||
// The following BNF is derived from this standard.
|
||||
//
|
||||
// pattern := subpattern (';' subpattern)?
|
||||
// subpattern := affix? number exponent? affix?
|
||||
// number := decimal | sigDigits
|
||||
// decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
|
||||
// fraction := '0'* '#'*
|
||||
// sigDigits := '#'* '@' '@'* '#'*
|
||||
// exponent := 'E' '+'? '0'* '0'
|
||||
// padSpec := '*' \L
|
||||
//
|
||||
// Notes:
|
||||
// - An affix pattern may contain any runes, but runes with special meaning
|
||||
// should be escaped.
|
||||
// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
|
||||
// interstitial commas.
|
||||
|
||||
// TODO: replace special characters in affixes (-, +, ¤) with control codes.
|
||||
|
||||
// Pattern holds information for formatting numbers. It is designed to hold
|
||||
// information from CLDR number patterns.
|
||||
//
|
||||
// This pattern is precompiled for all patterns for all languages. Even though
|
||||
// the number of patterns is not very large, we want to keep this small.
|
||||
//
|
||||
// This type is only intended for internal use.
|
||||
type Pattern struct {
|
||||
RoundingContext
|
||||
|
||||
Affix string // includes prefix and suffix. First byte is prefix length.
|
||||
Offset uint16 // Offset into Affix for prefix and suffix
|
||||
NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
|
||||
PadRune rune
|
||||
FormatWidth uint16
|
||||
|
||||
GroupingSize [2]uint8
|
||||
Flags PatternFlag
|
||||
}
|
||||
|
||||
// A RoundingContext indicates how a number should be converted to digits.
|
||||
// It contains all information needed to determine the "visible digits" as
|
||||
// required by the pluralization rules.
|
||||
type RoundingContext struct {
|
||||
// TODO: unify these two fields so that there is a more unambiguous meaning
|
||||
// of how precision is handled.
|
||||
MaxSignificantDigits int16 // -1 is unlimited
|
||||
MaxFractionDigits int16 // -1 is unlimited
|
||||
|
||||
Increment uint32
|
||||
IncrementScale uint8 // May differ from printed scale.
|
||||
|
||||
Mode RoundingMode
|
||||
|
||||
DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
|
||||
|
||||
// Number of digits.
|
||||
MinIntegerDigits uint8
|
||||
|
||||
MaxIntegerDigits uint8
|
||||
MinFractionDigits uint8
|
||||
MinSignificantDigits uint8
|
||||
|
||||
MinExponentDigits uint8
|
||||
}
|
||||
|
||||
// RoundSignificantDigits returns the number of significant digits an
|
||||
// implementation of Convert may round to or n < 0 if there is no maximum or
|
||||
// a maximum is not recommended.
|
||||
func (r *RoundingContext) RoundSignificantDigits() (n int) {
|
||||
if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
|
||||
return int(r.MaxSignificantDigits)
|
||||
} else if r.isScientific() && r.MaxIntegerDigits == 1 {
|
||||
if r.MaxSignificantDigits == 0 ||
|
||||
int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
|
||||
// Note: don't add DigitShift: it is only used for decimals.
|
||||
return int(r.MaxFractionDigits) + 1
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// RoundFractionDigits returns the number of fraction digits an implementation
|
||||
// of Convert may round to or n < 0 if there is no maximum or a maximum is not
|
||||
// recommended.
|
||||
func (r *RoundingContext) RoundFractionDigits() (n int) {
|
||||
if r.MinExponentDigits == 0 &&
|
||||
r.MaxSignificantDigits == 0 &&
|
||||
r.MaxFractionDigits >= 0 {
|
||||
return int(r.MaxFractionDigits) + int(r.DigitShift)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// SetScale fixes the RoundingContext to a fixed number of fraction digits.
|
||||
func (r *RoundingContext) SetScale(scale int) {
|
||||
r.MinFractionDigits = uint8(scale)
|
||||
r.MaxFractionDigits = int16(scale)
|
||||
}
|
||||
|
||||
func (r *RoundingContext) SetPrecision(prec int) {
|
||||
r.MaxSignificantDigits = int16(prec)
|
||||
}
|
||||
|
||||
func (r *RoundingContext) isScientific() bool {
|
||||
return r.MinExponentDigits > 0
|
||||
}
|
||||
|
||||
func (f *Pattern) needsSep(pos int) bool {
|
||||
p := pos - 1
|
||||
size := int(f.GroupingSize[0])
|
||||
if size == 0 || p == 0 {
|
||||
return false
|
||||
}
|
||||
if p == size {
|
||||
return true
|
||||
}
|
||||
if p -= size; p < 0 {
|
||||
return false
|
||||
}
|
||||
// TODO: make second groupingsize the same as first if 0 so that we can
|
||||
// avoid this check.
|
||||
if x := int(f.GroupingSize[1]); x != 0 {
|
||||
size = x
|
||||
}
|
||||
return p%size == 0
|
||||
}
|
||||
|
||||
// A PatternFlag is a bit mask for the flag field of a Pattern.
|
||||
type PatternFlag uint8
|
||||
|
||||
const (
|
||||
AlwaysSign PatternFlag = 1 << iota
|
||||
ElideSign // Use space instead of plus sign. AlwaysSign must be true.
|
||||
AlwaysExpSign
|
||||
AlwaysDecimalSeparator
|
||||
ParenthesisForNegative // Common pattern. Saves space.
|
||||
|
||||
PadAfterNumber
|
||||
PadAfterAffix
|
||||
|
||||
PadBeforePrefix = 0 // Default
|
||||
PadAfterPrefix = PadAfterAffix
|
||||
PadBeforeSuffix = PadAfterNumber
|
||||
PadAfterSuffix = PadAfterNumber | PadAfterAffix
|
||||
PadMask = PadAfterNumber | PadAfterAffix
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
*Pattern
|
||||
|
||||
leadingSharps int
|
||||
|
||||
pos int
|
||||
err error
|
||||
doNotTerminate bool
|
||||
groupingCount uint
|
||||
hasGroup bool
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (p *parser) setError(err error) {
|
||||
if p.err == nil {
|
||||
p.err = err
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) updateGrouping() {
|
||||
if p.hasGroup &&
|
||||
0 < p.groupingCount && p.groupingCount < 255 {
|
||||
p.GroupingSize[1] = p.GroupingSize[0]
|
||||
p.GroupingSize[0] = uint8(p.groupingCount)
|
||||
}
|
||||
p.groupingCount = 0
|
||||
p.hasGroup = true
|
||||
}
|
||||
|
||||
var (
|
||||
// TODO: more sensible and localizeable error messages.
|
||||
errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
|
||||
errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
|
||||
errInvalidQuote = errors.New("format: invalid quote")
|
||||
errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
|
||||
errDuplicatePercentSign = errors.New("format: duplicate percent sign")
|
||||
errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
|
||||
errUnexpectedEnd = errors.New("format: unexpected end of pattern")
|
||||
)
|
||||
|
||||
// ParsePattern extracts formatting information from a CLDR number pattern.
|
||||
//
|
||||
// See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
|
||||
func ParsePattern(s string) (f *Pattern, err error) {
|
||||
p := parser{Pattern: &Pattern{}}
|
||||
|
||||
s = p.parseSubPattern(s)
|
||||
|
||||
if s != "" {
|
||||
// Parse negative sub pattern.
|
||||
if s[0] != ';' {
|
||||
p.setError(errors.New("format: error parsing first sub pattern"))
|
||||
return nil, p.err
|
||||
}
|
||||
neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
|
||||
s = neg.parseSubPattern(s[len(";"):])
|
||||
p.NegOffset = uint16(len(p.buf))
|
||||
p.buf = append(p.buf, neg.buf...)
|
||||
}
|
||||
if s != "" {
|
||||
p.setError(errors.New("format: spurious characters at end of pattern"))
|
||||
}
|
||||
if p.err != nil {
|
||||
return nil, p.err
|
||||
}
|
||||
if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
|
||||
// No prefix or suffixes.
|
||||
p.NegOffset = 0
|
||||
} else {
|
||||
p.Affix = affix
|
||||
}
|
||||
if p.Increment == 0 {
|
||||
p.IncrementScale = 0
|
||||
}
|
||||
return p.Pattern, nil
|
||||
}
|
||||
|
||||
func (p *parser) parseSubPattern(s string) string {
|
||||
s = p.parsePad(s, PadBeforePrefix)
|
||||
s = p.parseAffix(s)
|
||||
s = p.parsePad(s, PadAfterPrefix)
|
||||
|
||||
s = p.parse(p.number, s)
|
||||
p.updateGrouping()
|
||||
|
||||
s = p.parsePad(s, PadBeforeSuffix)
|
||||
s = p.parseAffix(s)
|
||||
s = p.parsePad(s, PadAfterSuffix)
|
||||
return s
|
||||
}
|
||||
|
||||
func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
|
||||
if len(s) >= 2 && s[0] == '*' {
|
||||
r, sz := utf8.DecodeRuneInString(s[1:])
|
||||
if p.PadRune != 0 {
|
||||
p.err = errMultiplePadSpecifiers
|
||||
} else {
|
||||
p.Flags |= f
|
||||
p.PadRune = r
|
||||
}
|
||||
return s[1+sz:]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (p *parser) parseAffix(s string) string {
|
||||
x := len(p.buf)
|
||||
p.buf = append(p.buf, 0) // placeholder for affix length
|
||||
|
||||
s = p.parse(p.affix, s)
|
||||
|
||||
n := len(p.buf) - x - 1
|
||||
if n > 0xFF {
|
||||
p.setError(errAffixTooLarge)
|
||||
}
|
||||
p.buf[x] = uint8(n)
|
||||
return s
|
||||
}
|
||||
|
||||
// state implements a state transition. It returns the new state. A state
|
||||
// function may set an error on the parser or may simply return on an incorrect
|
||||
// token and let the next phase fail.
|
||||
type state func(r rune) state
|
||||
|
||||
// parse repeatedly applies a state function on the given string until a
|
||||
// termination condition is reached.
|
||||
func (p *parser) parse(fn state, s string) (tail string) {
|
||||
for i, r := range s {
|
||||
p.doNotTerminate = false
|
||||
if fn = fn(r); fn == nil || p.err != nil {
|
||||
return s[i:]
|
||||
}
|
||||
p.FormatWidth++
|
||||
}
|
||||
if p.doNotTerminate {
|
||||
p.setError(errUnexpectedEnd)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *parser) affix(r rune) state {
|
||||
switch r {
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'#', '@', '.', '*', ',', ';':
|
||||
return nil
|
||||
case '\'':
|
||||
p.FormatWidth--
|
||||
return p.escapeFirst
|
||||
case '%':
|
||||
if p.DigitShift != 0 {
|
||||
p.setError(errDuplicatePercentSign)
|
||||
}
|
||||
p.DigitShift = 2
|
||||
case '\u2030': // ‰ Per mille
|
||||
if p.DigitShift != 0 {
|
||||
p.setError(errDuplicatePermilleSign)
|
||||
}
|
||||
p.DigitShift = 3
|
||||
// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
|
||||
}
|
||||
p.buf = append(p.buf, string(r)...)
|
||||
return p.affix
|
||||
}
|
||||
|
||||
func (p *parser) escapeFirst(r rune) state {
|
||||
switch r {
|
||||
case '\'':
|
||||
p.buf = append(p.buf, "\\'"...)
|
||||
return p.affix
|
||||
default:
|
||||
p.buf = append(p.buf, '\'')
|
||||
p.buf = append(p.buf, string(r)...)
|
||||
}
|
||||
return p.escape
|
||||
}
|
||||
|
||||
func (p *parser) escape(r rune) state {
|
||||
switch r {
|
||||
case '\'':
|
||||
p.FormatWidth--
|
||||
p.buf = append(p.buf, '\'')
|
||||
return p.affix
|
||||
default:
|
||||
p.buf = append(p.buf, string(r)...)
|
||||
}
|
||||
return p.escape
|
||||
}
|
||||
|
||||
// number parses a number. The BNF says the integer part should always have
|
||||
// a '0', but that does not appear to be the case according to the rest of the
|
||||
// documentation. We will allow having only '#' numbers.
|
||||
func (p *parser) number(r rune) state {
|
||||
switch r {
|
||||
case '#':
|
||||
p.groupingCount++
|
||||
p.leadingSharps++
|
||||
case '@':
|
||||
p.groupingCount++
|
||||
p.leadingSharps = 0
|
||||
p.MaxFractionDigits = -1
|
||||
return p.sigDigits(r)
|
||||
case ',':
|
||||
if p.leadingSharps == 0 { // no leading commas
|
||||
return nil
|
||||
}
|
||||
p.updateGrouping()
|
||||
case 'E':
|
||||
p.MaxIntegerDigits = uint8(p.leadingSharps)
|
||||
return p.exponent
|
||||
case '.': // allow ".##" etc.
|
||||
p.updateGrouping()
|
||||
return p.fraction
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
return p.integer(r)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
return p.number
|
||||
}
|
||||
|
||||
func (p *parser) integer(r rune) state {
|
||||
if !('0' <= r && r <= '9') {
|
||||
var next state
|
||||
switch r {
|
||||
case 'E':
|
||||
if p.leadingSharps > 0 {
|
||||
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
|
||||
}
|
||||
next = p.exponent
|
||||
case '.':
|
||||
next = p.fraction
|
||||
case ',':
|
||||
next = p.integer
|
||||
}
|
||||
p.updateGrouping()
|
||||
return next
|
||||
}
|
||||
p.Increment = p.Increment*10 + uint32(r-'0')
|
||||
p.groupingCount++
|
||||
p.MinIntegerDigits++
|
||||
return p.integer
|
||||
}
|
||||
|
||||
func (p *parser) sigDigits(r rune) state {
|
||||
switch r {
|
||||
case '@':
|
||||
p.groupingCount++
|
||||
p.MaxSignificantDigits++
|
||||
p.MinSignificantDigits++
|
||||
case '#':
|
||||
return p.sigDigitsFinal(r)
|
||||
case 'E':
|
||||
p.updateGrouping()
|
||||
return p.normalizeSigDigitsWithExponent()
|
||||
default:
|
||||
p.updateGrouping()
|
||||
return nil
|
||||
}
|
||||
return p.sigDigits
|
||||
}
|
||||
|
||||
func (p *parser) sigDigitsFinal(r rune) state {
|
||||
switch r {
|
||||
case '#':
|
||||
p.groupingCount++
|
||||
p.MaxSignificantDigits++
|
||||
case 'E':
|
||||
p.updateGrouping()
|
||||
return p.normalizeSigDigitsWithExponent()
|
||||
default:
|
||||
p.updateGrouping()
|
||||
return nil
|
||||
}
|
||||
return p.sigDigitsFinal
|
||||
}
|
||||
|
||||
func (p *parser) normalizeSigDigitsWithExponent() state {
|
||||
p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
|
||||
p.MinFractionDigits = p.MinSignificantDigits - 1
|
||||
p.MaxFractionDigits = p.MaxSignificantDigits - 1
|
||||
p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
|
||||
return p.exponent
|
||||
}
|
||||
|
||||
func (p *parser) fraction(r rune) state {
|
||||
switch r {
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
p.Increment = p.Increment*10 + uint32(r-'0')
|
||||
p.IncrementScale++
|
||||
p.MinFractionDigits++
|
||||
p.MaxFractionDigits++
|
||||
case '#':
|
||||
p.MaxFractionDigits++
|
||||
case 'E':
|
||||
if p.leadingSharps > 0 {
|
||||
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
|
||||
}
|
||||
return p.exponent
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
return p.fraction
|
||||
}
|
||||
|
||||
func (p *parser) exponent(r rune) state {
|
||||
switch r {
|
||||
case '+':
|
||||
// Set mode and check it wasn't already set.
|
||||
if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
|
||||
break
|
||||
}
|
||||
p.Flags |= AlwaysExpSign
|
||||
p.doNotTerminate = true
|
||||
return p.exponent
|
||||
case '0':
|
||||
p.MinExponentDigits++
|
||||
return p.exponent
|
||||
}
|
||||
// termination condition
|
||||
if p.MinExponentDigits == 0 {
|
||||
p.setError(errors.New("format: need at least one digit"))
|
||||
}
|
||||
return nil
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
// Code generated by "stringer -type RoundingMode"; DO NOT EDIT.
|
||||
|
||||
package number
|
||||
|
||||
import "strconv"
|
||||
|
||||
const _RoundingMode_name = "ToNearestEvenToNearestZeroToNearestAwayToPositiveInfToNegativeInfToZeroAwayFromZeronumModes"
|
||||
|
||||
var _RoundingMode_index = [...]uint8{0, 13, 26, 39, 52, 65, 71, 83, 91}
|
||||
|
||||
func (i RoundingMode) String() string {
|
||||
if i >= RoundingMode(len(_RoundingMode_index)-1) {
|
||||
return "RoundingMode(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _RoundingMode_name[_RoundingMode_index[i]:_RoundingMode_index[i+1]]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,86 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package stringset provides a way to represent a collection of strings
|
||||
// compactly.
|
||||
package stringset
|
||||
|
||||
import "sort"
|
||||
|
||||
// A Set holds a collection of strings that can be looked up by an index number.
|
||||
type Set struct {
|
||||
// These fields are exported to allow for code generation.
|
||||
|
||||
Data string
|
||||
Index []uint16
|
||||
}
|
||||
|
||||
// Elem returns the string with index i. It panics if i is out of range.
|
||||
func (s *Set) Elem(i int) string {
|
||||
return s.Data[s.Index[i]:s.Index[i+1]]
|
||||
}
|
||||
|
||||
// Len returns the number of strings in the set.
|
||||
func (s *Set) Len() int {
|
||||
return len(s.Index) - 1
|
||||
}
|
||||
|
||||
// Search returns the index of the given string or -1 if it is not in the set.
|
||||
// The Set must have been created with strings in sorted order.
|
||||
func Search(s *Set, str string) int {
|
||||
// TODO: optimize this if it gets used a lot.
|
||||
n := len(s.Index) - 1
|
||||
p := sort.Search(n, func(i int) bool {
|
||||
return s.Elem(i) >= str
|
||||
})
|
||||
if p == n || str != s.Elem(p) {
|
||||
return -1
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// A Builder constructs Sets.
|
||||
type Builder struct {
|
||||
set Set
|
||||
index map[string]int
|
||||
}
|
||||
|
||||
// NewBuilder returns a new and initialized Builder.
|
||||
func NewBuilder() *Builder {
|
||||
return &Builder{
|
||||
set: Set{
|
||||
Index: []uint16{0},
|
||||
},
|
||||
index: map[string]int{},
|
||||
}
|
||||
}
|
||||
|
||||
// Set creates the set created so far.
|
||||
func (b *Builder) Set() Set {
|
||||
return b.set
|
||||
}
|
||||
|
||||
// Index returns the index for the given string, which must have been added
|
||||
// before.
|
||||
func (b *Builder) Index(s string) int {
|
||||
return b.index[s]
|
||||
}
|
||||
|
||||
// Add adds a string to the index. Strings that are added by a single Add will
|
||||
// be stored together, unless they match an existing string.
|
||||
func (b *Builder) Add(ss ...string) {
|
||||
// First check if the string already exists.
|
||||
for _, s := range ss {
|
||||
if _, ok := b.index[s]; ok {
|
||||
continue
|
||||
}
|
||||
b.index[s] = len(b.set.Index) - 1
|
||||
b.set.Data += s
|
||||
x := len(b.set.Data)
|
||||
if x > 0xFFFF {
|
||||
panic("Index too > 0xFFFF")
|
||||
}
|
||||
b.set.Index = append(b.set.Index, uint16(x))
|
||||
}
|
||||
}
|
@ -0,0 +1,100 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package tag contains functionality handling tags and related data.
|
||||
package tag // import "golang.org/x/text/internal/tag"
|
||||
|
||||
import "sort"
|
||||
|
||||
// An Index converts tags to a compact numeric value.
|
||||
//
|
||||
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
|
||||
// be used to store additional information about the tag.
|
||||
type Index string
|
||||
|
||||
// Elem returns the element data at the given index.
|
||||
func (s Index) Elem(x int) string {
|
||||
return string(s[x*4 : x*4+4])
|
||||
}
|
||||
|
||||
// Index reports the index of the given key or -1 if it could not be found.
|
||||
// Only the first len(key) bytes from the start of the 4-byte entries will be
|
||||
// considered for the search and the first match in Index will be returned.
|
||||
func (s Index) Index(key []byte) int {
|
||||
n := len(key)
|
||||
// search the index of the first entry with an equal or higher value than
|
||||
// key in s.
|
||||
index := sort.Search(len(s)/4, func(i int) bool {
|
||||
return cmp(s[i*4:i*4+n], key) != -1
|
||||
})
|
||||
i := index * 4
|
||||
if cmp(s[i:i+len(key)], key) != 0 {
|
||||
return -1
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// Next finds the next occurrence of key after index x, which must have been
|
||||
// obtained from a call to Index using the same key. It returns x+1 or -1.
|
||||
func (s Index) Next(key []byte, x int) int {
|
||||
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
|
||||
return x
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// cmp returns an integer comparing a and b lexicographically.
|
||||
func cmp(a Index, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i, c := range b[:n] {
|
||||
switch {
|
||||
case a[i] > c:
|
||||
return 1
|
||||
case a[i] < c:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case len(a) < len(b):
|
||||
return -1
|
||||
case len(a) > len(b):
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing a and b lexicographically.
|
||||
func Compare(a string, b []byte) int {
|
||||
return cmp(Index(a), b)
|
||||
}
|
||||
|
||||
// FixCase reformats b to the same pattern of cases as form.
|
||||
// If returns false if string b is malformed.
|
||||
func FixCase(form string, b []byte) bool {
|
||||
if len(form) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b {
|
||||
if form[i] <= 'Z' {
|
||||
if c >= 'a' {
|
||||
c -= 'z' - 'Z'
|
||||
}
|
||||
if c < 'A' || 'Z' < c {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if c <= 'Z' {
|
||||
c += 'z' - 'Z'
|
||||
}
|
||||
if c < 'a' || 'z' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return true
|
||||
}
|
@ -0,0 +1,187 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// The Coverage interface is used to define the level of coverage of an
|
||||
// internationalization service. Note that not all types are supported by all
|
||||
// services. As lists may be generated on the fly, it is recommended that users
|
||||
// of a Coverage cache the results.
|
||||
type Coverage interface {
|
||||
// Tags returns the list of supported tags.
|
||||
Tags() []Tag
|
||||
|
||||
// BaseLanguages returns the list of supported base languages.
|
||||
BaseLanguages() []Base
|
||||
|
||||
// Scripts returns the list of supported scripts.
|
||||
Scripts() []Script
|
||||
|
||||
// Regions returns the list of supported regions.
|
||||
Regions() []Region
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported defines a Coverage that lists all supported subtags. Tags
|
||||
// always returns nil.
|
||||
Supported Coverage = allSubtags{}
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - Support Variants, numbering systems.
|
||||
// - CLDR coverage levels.
|
||||
// - Set of common tags defined in this package.
|
||||
|
||||
type allSubtags struct{}
|
||||
|
||||
// Regions returns the list of supported regions. As all regions are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" region is not returned.
|
||||
func (s allSubtags) Regions() []Region {
|
||||
reg := make([]Region, language.NumRegions)
|
||||
for i := range reg {
|
||||
reg[i] = Region{language.Region(i + 1)}
|
||||
}
|
||||
return reg
|
||||
}
|
||||
|
||||
// Scripts returns the list of supported scripts. As all scripts are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" script is not returned.
|
||||
func (s allSubtags) Scripts() []Script {
|
||||
scr := make([]Script, language.NumScripts)
|
||||
for i := range scr {
|
||||
scr[i] = Script{language.Script(i + 1)}
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func (s allSubtags) BaseLanguages() []Base {
|
||||
bs := language.BaseLanguages()
|
||||
base := make([]Base, len(bs))
|
||||
for i, b := range bs {
|
||||
base[i] = Base{b}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// Tags always returns nil.
|
||||
func (s allSubtags) Tags() []Tag {
|
||||
return nil
|
||||
}
|
||||
|
||||
// coverage is used used by NewCoverage which is used as a convenient way for
|
||||
// creating Coverage implementations for partially defined data. Very often a
|
||||
// package will only need to define a subset of slices. coverage provides a
|
||||
// convenient way to do this. Moreover, packages using NewCoverage, instead of
|
||||
// their own implementation, will not break if later new slice types are added.
|
||||
type coverage struct {
|
||||
tags func() []Tag
|
||||
bases func() []Base
|
||||
scripts func() []Script
|
||||
regions func() []Region
|
||||
}
|
||||
|
||||
func (s *coverage) Tags() []Tag {
|
||||
if s.tags == nil {
|
||||
return nil
|
||||
}
|
||||
return s.tags()
|
||||
}
|
||||
|
||||
// bases implements sort.Interface and is used to sort base languages.
|
||||
type bases []Base
|
||||
|
||||
func (b bases) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bases) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bases) Less(i, j int) bool {
|
||||
return b[i].langID < b[j].langID
|
||||
}
|
||||
|
||||
// BaseLanguages returns the result from calling s.bases if it is specified or
|
||||
// otherwise derives the set of supported base languages from tags.
|
||||
func (s *coverage) BaseLanguages() []Base {
|
||||
if s.bases == nil {
|
||||
tags := s.Tags()
|
||||
if len(tags) == 0 {
|
||||
return nil
|
||||
}
|
||||
a := make([]Base, len(tags))
|
||||
for i, t := range tags {
|
||||
a[i] = Base{language.Language(t.lang())}
|
||||
}
|
||||
sort.Sort(bases(a))
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
k++
|
||||
a[k] = a[i]
|
||||
}
|
||||
}
|
||||
return a[:k+1]
|
||||
}
|
||||
return s.bases()
|
||||
}
|
||||
|
||||
func (s *coverage) Scripts() []Script {
|
||||
if s.scripts == nil {
|
||||
return nil
|
||||
}
|
||||
return s.scripts()
|
||||
}
|
||||
|
||||
func (s *coverage) Regions() []Region {
|
||||
if s.regions == nil {
|
||||
return nil
|
||||
}
|
||||
return s.regions()
|
||||
}
|
||||
|
||||
// NewCoverage returns a Coverage for the given lists. It is typically used by
|
||||
// packages providing internationalization services to define their level of
|
||||
// coverage. A list may be of type []T or func() []T, where T is either Tag,
|
||||
// Base, Script or Region. The returned Coverage derives the value for Bases
|
||||
// from Tags if no func or slice for []Base is specified. For other unspecified
|
||||
// types the returned Coverage will return nil for the respective methods.
|
||||
func NewCoverage(list ...interface{}) Coverage {
|
||||
s := &coverage{}
|
||||
for _, x := range list {
|
||||
switch v := x.(type) {
|
||||
case func() []Base:
|
||||
s.bases = v
|
||||
case func() []Script:
|
||||
s.scripts = v
|
||||
case func() []Region:
|
||||
s.regions = v
|
||||
case func() []Tag:
|
||||
s.tags = v
|
||||
case []Base:
|
||||
s.bases = func() []Base { return v }
|
||||
case []Script:
|
||||
s.scripts = func() []Script { return v }
|
||||
case []Region:
|
||||
s.regions = func() []Region { return v }
|
||||
case []Tag:
|
||||
s.tags = func() []Tag { return v }
|
||||
default:
|
||||
panic(fmt.Sprintf("language: unsupported set type %T", v))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package language implements BCP 47 language tags and related functionality.
|
||||
//
|
||||
// The most important function of package language is to match a list of
|
||||
// user-preferred languages to a list of supported languages.
|
||||
// It alleviates the developer of dealing with the complexity of this process
|
||||
// and provides the user with the best experience
|
||||
// (see https://blog.golang.org/matchlang).
|
||||
//
|
||||
//
|
||||
// Matching preferred against supported languages
|
||||
//
|
||||
// A Matcher for an application that supports English, Australian English,
|
||||
// Danish, and standard Mandarin can be created as follows:
|
||||
//
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
//
|
||||
// This list of supported languages is typically implied by the languages for
|
||||
// which there exists translations of the user interface.
|
||||
//
|
||||
// User-preferred languages usually come as a comma-separated list of BCP 47
|
||||
// language tags.
|
||||
// The MatchString finds best matches for such strings:
|
||||
//
|
||||
// handler(w http.ResponseWriter, r *http.Request) {
|
||||
// lang, _ := r.Cookie("lang")
|
||||
// accept := r.Header.Get("Accept-Language")
|
||||
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||
//
|
||||
// // tag should now be used for the initialization of any
|
||||
// // locale-specific service.
|
||||
// }
|
||||
//
|
||||
// The Matcher's Match method can be used to match Tags directly.
|
||||
//
|
||||
// Matchers are aware of the intricacies of equivalence between languages, such
|
||||
// as deprecated subtags, legacy tags, macro languages, mutual
|
||||
// intelligibility between scripts and languages, and transparently passing
|
||||
// BCP 47 user configuration.
|
||||
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
|
||||
// and will know that Cantonese ("yue") is a good match for "zh-HK".
|
||||
//
|
||||
//
|
||||
// Using match results
|
||||
//
|
||||
// To guarantee a consistent user experience to the user it is important to
|
||||
// use the same language tag for the selection of any locale-specific services.
|
||||
// For example, it is utterly confusing to substitute spelled-out numbers
|
||||
// or dates in one language in text of another language.
|
||||
// More subtly confusing is using the wrong sorting order or casing
|
||||
// algorithm for a certain language.
|
||||
//
|
||||
// All the packages in x/text that provide locale-specific services
|
||||
// (e.g. collate, cases) should be initialized with the tag that was
|
||||
// obtained at the start of an interaction with the user.
|
||||
//
|
||||
// Note that Tag that is returned by Match and MatchString may differ from any
|
||||
// of the supported languages, as it may contain carried over settings from
|
||||
// the user tags.
|
||||
// This may be inconvenient when your application has some additional
|
||||
// locale-specific data for your supported languages.
|
||||
// Match and MatchString both return the index of the matched supported tag
|
||||
// to simplify associating such data with the matched tag.
|
||||
//
|
||||
//
|
||||
// Canonicalization
|
||||
//
|
||||
// If one uses the Matcher to compare languages one does not need to
|
||||
// worry about canonicalization.
|
||||
//
|
||||
// The meaning of a Tag varies per application. The language package
|
||||
// therefore delays canonicalization and preserves information as much
|
||||
// as possible. The Matcher, however, will always take into account that
|
||||
// two different tags may represent the same language.
|
||||
//
|
||||
// By default, only legacy and deprecated tags are converted into their
|
||||
// canonical equivalent. All other information is preserved. This approach makes
|
||||
// the confidence scores more accurate and allows matchers to distinguish
|
||||
// between variants that are otherwise lost.
|
||||
//
|
||||
// As a consequence, two tags that should be treated as identical according to
|
||||
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
|
||||
// Matcher handles such distinctions, though, and is aware of the
|
||||
// equivalence relations. The CanonType type can be used to alter the
|
||||
// canonicalization form.
|
||||
//
|
||||
// References
|
||||
//
|
||||
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
|
||||
//
|
||||
package language // import "golang.org/x/text/language"
|
||||
|
||||
// TODO: explanation on how to match languages for your own locale-specific
|
||||
// service.
|
@ -0,0 +1,305 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "language")
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeConstants()
|
||||
b.writeMatchData()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
hw io.Writer // MultiWriter for w and w.Hash
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func (b *builder) langIndex(s string) uint16 {
|
||||
return uint16(language.MustParseBase(s))
|
||||
}
|
||||
|
||||
func (b *builder) regionIndex(s string) int {
|
||||
return int(language.MustParseRegion(s))
|
||||
}
|
||||
|
||||
func (b *builder) scriptIndex(s string) int {
|
||||
return int(language.MustParseScript(s))
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
hw: io.MultiWriter(w, w.Hash),
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
|
||||
// writeConsts computes f(v) for all v in values and writes the results
|
||||
// as constants named _v to a single constant block.
|
||||
func (b *builder) writeConsts(f func(string) int, values ...string) {
|
||||
fmt.Fprintln(b.w, "const (")
|
||||
for _, v := range values {
|
||||
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
|
||||
}
|
||||
fmt.Fprintln(b.w, ")")
|
||||
}
|
||||
|
||||
// TODO: region inclusion data will probably not be use used in future matchers.
|
||||
|
||||
var langConsts = []string{
|
||||
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
|
||||
}
|
||||
|
||||
var scriptConsts = []string{
|
||||
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
|
||||
"Zzzz",
|
||||
}
|
||||
|
||||
var regionConsts = []string{
|
||||
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
|
||||
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
|
||||
}
|
||||
|
||||
func (b *builder) writeConstants() {
|
||||
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
|
||||
b.writeConsts(b.regionIndex, regionConsts...)
|
||||
b.writeConsts(b.scriptIndex, scriptConsts...)
|
||||
}
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want, have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
|
||||
type scriptIntelligibility struct {
|
||||
wantLang, haveLang uint16
|
||||
wantScript, haveScript uint8
|
||||
distance uint8
|
||||
// Always oneway
|
||||
}
|
||||
|
||||
type regionIntelligibility struct {
|
||||
lang uint16 // compact language id
|
||||
script uint8 // 0 means any
|
||||
group uint8 // 0 means any; if bit 7 is set it means inverse
|
||||
distance uint8
|
||||
// Always twoway.
|
||||
}
|
||||
|
||||
// writeMatchData writes tables with languages and scripts for which there is
|
||||
// mutual intelligibility. The data is based on CLDR's languageMatching data.
|
||||
// Note that we use a different algorithm than the one defined by CLDR and that
|
||||
// we slightly modify the data. For example, we convert scores to confidence levels.
|
||||
// We also drop all region-related data as we use a different algorithm to
|
||||
// determine region equivalence.
|
||||
func (b *builder) writeMatchData() {
|
||||
lm := b.supp.LanguageMatching.LanguageMatches
|
||||
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
|
||||
|
||||
regionHierarchy := map[string][]string{}
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
regions := strings.Split(g.Contains, " ")
|
||||
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
|
||||
}
|
||||
regionToGroups := make([]uint8, language.NumRegions)
|
||||
|
||||
idToIndex := map[string]uint8{}
|
||||
for i, mv := range lm[0].MatchVariable {
|
||||
if i > 6 {
|
||||
log.Fatalf("Too many groups: %d", i)
|
||||
}
|
||||
idToIndex[mv.Id] = uint8(i + 1)
|
||||
// TODO: also handle '-'
|
||||
for _, r := range strings.Split(mv.Value, "+") {
|
||||
todo := []string{r}
|
||||
for k := 0; k < len(todo); k++ {
|
||||
r := todo[k]
|
||||
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
|
||||
todo = append(todo, regionHierarchy[r]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.w.WriteVar("regionToGroups", regionToGroups)
|
||||
|
||||
// maps language id to in- and out-of-group region.
|
||||
paradigmLocales := [][3]uint16{}
|
||||
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
|
||||
for i := 0; i < len(locales); i += 2 {
|
||||
x := [3]uint16{}
|
||||
for j := 0; j < 2; j++ {
|
||||
pc := strings.SplitN(locales[i+j], "-", 2)
|
||||
x[0] = b.langIndex(pc[0])
|
||||
if len(pc) == 2 {
|
||||
x[1+j] = uint16(b.regionIndex(pc[1]))
|
||||
}
|
||||
}
|
||||
paradigmLocales = append(paradigmLocales, x)
|
||||
}
|
||||
b.w.WriteVar("paradigmLocales", paradigmLocales)
|
||||
|
||||
b.w.WriteType(mutualIntelligibility{})
|
||||
b.w.WriteType(scriptIntelligibility{})
|
||||
b.w.WriteType(regionIntelligibility{})
|
||||
|
||||
matchLang := []mutualIntelligibility{}
|
||||
matchScript := []scriptIntelligibility{}
|
||||
matchRegion := []regionIntelligibility{}
|
||||
// Convert the languageMatch entries in lists keyed by desired language.
|
||||
for _, m := range lm[0].LanguageMatch {
|
||||
// Different versions of CLDR use different separators.
|
||||
desired := strings.Replace(m.Desired, "-", "_", -1)
|
||||
supported := strings.Replace(m.Supported, "-", "_", -1)
|
||||
d := strings.Split(desired, "_")
|
||||
s := strings.Split(supported, "_")
|
||||
if len(d) != len(s) {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
continue
|
||||
}
|
||||
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
|
||||
switch len(d) {
|
||||
case 2:
|
||||
if desired == supported && desired == "*_*" {
|
||||
continue
|
||||
}
|
||||
// language-script pair.
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(d[0])),
|
||||
haveLang: uint16(b.langIndex(s[0])),
|
||||
wantScript: uint8(b.scriptIndex(d[1])),
|
||||
haveScript: uint8(b.scriptIndex(s[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
if m.Oneway != "true" {
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(s[0])),
|
||||
haveLang: uint16(b.langIndex(d[0])),
|
||||
wantScript: uint8(b.scriptIndex(s[1])),
|
||||
haveScript: uint8(b.scriptIndex(d[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
}
|
||||
case 1:
|
||||
if desired == supported && desired == "*" {
|
||||
continue
|
||||
}
|
||||
if distance == 1 {
|
||||
// nb == no is already handled by macro mapping. Check there
|
||||
// really is only this case.
|
||||
if d[0] != "no" || s[0] != "nb" {
|
||||
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
|
||||
}
|
||||
continue
|
||||
}
|
||||
// TODO: consider dropping oneway field and just doubling the entry.
|
||||
matchLang = append(matchLang, mutualIntelligibility{
|
||||
want: uint16(b.langIndex(d[0])),
|
||||
have: uint16(b.langIndex(s[0])),
|
||||
distance: uint8(distance),
|
||||
oneway: m.Oneway == "true",
|
||||
})
|
||||
case 3:
|
||||
if desired == supported && desired == "*_*_*" {
|
||||
continue
|
||||
}
|
||||
if desired != supported {
|
||||
// This is now supported by CLDR, but only one case, which
|
||||
// should already be covered by paradigm locales. For instance,
|
||||
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
|
||||
// testdata/CLDRLocaleMatcherTest.txt tests this.
|
||||
if supported != "en_*_GB" {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
continue
|
||||
}
|
||||
ri := regionIntelligibility{
|
||||
lang: b.langIndex(d[0]),
|
||||
distance: uint8(distance),
|
||||
}
|
||||
if d[1] != "*" {
|
||||
ri.script = uint8(b.scriptIndex(d[1]))
|
||||
}
|
||||
switch {
|
||||
case d[2] == "*":
|
||||
ri.group = 0x80 // not contained in anything
|
||||
case strings.HasPrefix(d[2], "$!"):
|
||||
ri.group = 0x80
|
||||
d[2] = "$" + d[2][len("$!"):]
|
||||
fallthrough
|
||||
case strings.HasPrefix(d[2], "$"):
|
||||
ri.group |= idToIndex[d[2]]
|
||||
}
|
||||
matchRegion = append(matchRegion, ri)
|
||||
default:
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
}
|
||||
sort.SliceStable(matchLang, func(i, j int) bool {
|
||||
return matchLang[i].distance < matchLang[j].distance
|
||||
})
|
||||
b.w.WriteComment(`
|
||||
matchLang holds pairs of langIDs of base languages that are typically
|
||||
mutually intelligible. Each pair is associated with a confidence and
|
||||
whether the intelligibility goes one or both ways.`)
|
||||
b.w.WriteVar("matchLang", matchLang)
|
||||
|
||||
b.w.WriteComment(`
|
||||
matchScript holds pairs of scriptIDs where readers of one script
|
||||
can typically also read the other. Each is associated with a confidence.`)
|
||||
sort.SliceStable(matchScript, func(i, j int) bool {
|
||||
return matchScript[i].distance < matchScript[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchScript", matchScript)
|
||||
|
||||
sort.SliceStable(matchRegion, func(i, j int) bool {
|
||||
return matchRegion[i].distance < matchRegion[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchRegion", matchRegion)
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
func sortStable(s sort.Interface) {
|
||||
ss := stableSort{
|
||||
s: s,
|
||||
pos: make([]int, s.Len()),
|
||||
}
|
||||
for i := range ss.pos {
|
||||
ss.pos[i] = i
|
||||
}
|
||||
sort.Sort(&ss)
|
||||
}
|
||||
|
||||
type stableSort struct {
|
||||
s sort.Interface
|
||||
pos []int
|
||||
}
|
||||
|
||||
func (s *stableSort) Len() int {
|
||||
return len(s.pos)
|
||||
}
|
||||
|
||||
func (s *stableSort) Less(i, j int) bool {
|
||||
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
|
||||
}
|
||||
|
||||
func (s *stableSort) Swap(i, j int) {
|
||||
s.s.Swap(i, j)
|
||||
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
var sortStable = sort.Stable
|
@ -0,0 +1,596 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go -output tables.go
|
||||
|
||||
package language
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag compact.Tag
|
||||
|
||||
func makeTag(t language.Tag) (tag Tag) {
|
||||
return Tag(compact.Make(t))
|
||||
}
|
||||
|
||||
func (t *Tag) tag() language.Tag {
|
||||
return (*compact.Tag)(t).Tag()
|
||||
}
|
||||
|
||||
func (t *Tag) isCompact() bool {
|
||||
return (*compact.Tag)(t).IsCompact()
|
||||
}
|
||||
|
||||
// TODO: improve performance.
|
||||
func (t *Tag) lang() language.Language { return t.tag().LangID }
|
||||
func (t *Tag) region() language.Region { return t.tag().RegionID }
|
||||
func (t *Tag) script() language.Script { return t.tag().ScriptID }
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
return Default.Make(s)
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for c.Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func (c CanonType) Make(s string) Tag {
|
||||
t, _ := c.Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
func (t Tag) Raw() (b Base, s Script, r Region) {
|
||||
tt := t.tag()
|
||||
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
return compact.Tag(t).IsRoot()
|
||||
}
|
||||
|
||||
// CanonType can be used to enable or disable various types of canonicalization.
|
||||
type CanonType int
|
||||
|
||||
const (
|
||||
// Replace deprecated base languages with their preferred replacements.
|
||||
DeprecatedBase CanonType = 1 << iota
|
||||
// Replace deprecated scripts with their preferred replacements.
|
||||
DeprecatedScript
|
||||
// Replace deprecated regions with their preferred replacements.
|
||||
DeprecatedRegion
|
||||
// Remove redundant scripts.
|
||||
SuppressScript
|
||||
// Normalize legacy encodings. This includes legacy languages defined in
|
||||
// CLDR as well as bibliographic codes defined in ISO-639.
|
||||
Legacy
|
||||
// Map the dominant language of a macro language group to the macro language
|
||||
// subtag. For example cmn -> zh.
|
||||
Macro
|
||||
// The CLDR flag should be used if full compatibility with CLDR is required.
|
||||
// There are a few cases where language.Tag may differ from CLDR. To follow all
|
||||
// of CLDR's suggestions, use All|CLDR.
|
||||
CLDR
|
||||
|
||||
// Raw can be used to Compose or Parse without Canonicalization.
|
||||
Raw CanonType = 0
|
||||
|
||||
// Replace all deprecated tags with their preferred replacements.
|
||||
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
|
||||
|
||||
// All canonicalizations recommended by BCP 47.
|
||||
BCP47 = Deprecated | SuppressScript
|
||||
|
||||
// All canonicalizations.
|
||||
All = BCP47 | Legacy | Macro
|
||||
|
||||
// Default is the canonicalization used by Parse, Make and Compose. To
|
||||
// preserve as much information as possible, canonicalizations that remove
|
||||
// potentially valuable information are not included. The Matcher is
|
||||
// designed to recognize similar tags that would be the same if
|
||||
// they were canonicalized using All.
|
||||
Default = Deprecated | Legacy
|
||||
|
||||
canonLang = DeprecatedBase | Legacy | Macro
|
||||
|
||||
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
|
||||
)
|
||||
|
||||
// canonicalize returns the canonicalized equivalent of the tag and
|
||||
// whether there was any change.
|
||||
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
|
||||
if c == Raw {
|
||||
return t, false
|
||||
}
|
||||
changed := false
|
||||
if c&SuppressScript != 0 {
|
||||
if t.LangID.SuppressScript() == t.ScriptID {
|
||||
t.ScriptID = 0
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
if c&canonLang != 0 {
|
||||
for {
|
||||
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
|
||||
switch aliasType {
|
||||
case language.Legacy:
|
||||
if c&Legacy != 0 {
|
||||
if t.LangID == _sh && t.ScriptID == 0 {
|
||||
t.ScriptID = _Latn
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
}
|
||||
case language.Macro:
|
||||
if c&Macro != 0 {
|
||||
// We deviate here from CLDR. The mapping "nb" -> "no"
|
||||
// qualifies as a typical Macro language mapping. However,
|
||||
// for legacy reasons, CLDR maps "no", the macro language
|
||||
// code for Norwegian, to the dominant variant "nb". This
|
||||
// change is currently under consideration for CLDR as well.
|
||||
// See https://unicode.org/cldr/trac/ticket/2698 and also
|
||||
// https://unicode.org/cldr/trac/ticket/1790 for some of the
|
||||
// practical implications. TODO: this check could be removed
|
||||
// if CLDR adopts this change.
|
||||
if c&CLDR == 0 || t.LangID != _nb {
|
||||
changed = true
|
||||
t.LangID = l
|
||||
}
|
||||
}
|
||||
case language.Deprecated:
|
||||
if c&DeprecatedBase != 0 {
|
||||
if t.LangID == _mo && t.RegionID == 0 {
|
||||
t.RegionID = _MD
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
// Other canonicalization types may still apply.
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
|
||||
t.LangID = _nb
|
||||
changed = true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if c&DeprecatedScript != 0 {
|
||||
if t.ScriptID == _Qaai {
|
||||
changed = true
|
||||
t.ScriptID = _Zinh
|
||||
}
|
||||
}
|
||||
if c&DeprecatedRegion != 0 {
|
||||
if r := t.RegionID.Canonicalize(); r != t.RegionID {
|
||||
changed = true
|
||||
t.RegionID = r
|
||||
}
|
||||
}
|
||||
return t, changed
|
||||
}
|
||||
|
||||
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
|
||||
// First try fast path.
|
||||
if t.isCompact() {
|
||||
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// It is unlikely that one will canonicalize a tag after matching. So do
|
||||
// a slow but simple approach here.
|
||||
if tag, changed := canonicalize(c, t.tag()); changed {
|
||||
tag.RemakeString()
|
||||
return makeTag(tag), nil
|
||||
}
|
||||
return t, nil
|
||||
|
||||
}
|
||||
|
||||
// Confidence indicates the level of certainty for a given return value.
|
||||
// For example, Serbian may be written in Cyrillic or Latin script.
|
||||
// The confidence level indicates whether a value was explicitly specified,
|
||||
// whether it is typically the only possible value, or whether there is
|
||||
// an ambiguity.
|
||||
type Confidence int
|
||||
|
||||
const (
|
||||
No Confidence = iota // full confidence that there was no match
|
||||
Low // most likely value picked out of a set of alternatives
|
||||
High // value is generally assumed to be the correct match
|
||||
Exact // exact match or explicitly specified value
|
||||
)
|
||||
|
||||
var confName = []string{"No", "Low", "High", "Exact"}
|
||||
|
||||
func (c Confidence) String() string {
|
||||
return confName[c]
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
return t.tag().String()
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
return t.tag().MarshalText()
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
var tag language.Tag
|
||||
err := tag.UnmarshalText(text)
|
||||
*t = makeTag(tag)
|
||||
return err
|
||||
}
|
||||
|
||||
// Base returns the base language of the language tag. If the base language is
|
||||
// unspecified, an attempt will be made to infer it from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Base() (Base, Confidence) {
|
||||
if b := t.lang(); b != 0 {
|
||||
return Base{b}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
c := High
|
||||
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
|
||||
c = Low
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
|
||||
return Base{tag.LangID}, c
|
||||
}
|
||||
return Base{0}, No
|
||||
}
|
||||
|
||||
// Script infers the script for the language tag. If it was not explicitly given, it will infer
|
||||
// a most likely candidate.
|
||||
// If more than one script is commonly used for a language, the most likely one
|
||||
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||
// for Serbian.
|
||||
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
|
||||
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
|
||||
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
|
||||
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
|
||||
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
|
||||
// Note that an inferred script is never guaranteed to be the correct one. Latin is
|
||||
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||
// in the past. Also, the script that is commonly used may change over time.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Script() (Script, Confidence) {
|
||||
if scr := t.script(); scr != 0 {
|
||||
return Script{scr}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
sc, c := language.Script(_Zzzz), No
|
||||
if scr := tt.LangID.SuppressScript(); scr != 0 {
|
||||
// Note: it is not always the case that a language with a suppress
|
||||
// script value is only written in one script (e.g. kk, ms, pa).
|
||||
if tt.RegionID == 0 {
|
||||
return Script{scr}, High
|
||||
}
|
||||
sc, c = scr, High
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
if tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
} else {
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
}
|
||||
return Script{sc}, c
|
||||
}
|
||||
|
||||
// Region returns the region for the language tag. If it was not explicitly given, it will
|
||||
// infer a most likely candidate from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Region() (Region, Confidence) {
|
||||
if r := t.region(); r != 0 {
|
||||
return Region{r}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
if tt, err := tt.Maximize(); err == nil {
|
||||
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
|
||||
}
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
return Region{tag.RegionID}, Low
|
||||
}
|
||||
return Region{_ZZ}, No // TODO: return world instead of undetermined?
|
||||
}
|
||||
|
||||
// Variants returns the variants specified explicitly for this language tag.
|
||||
// or nil if no variant was specified.
|
||||
func (t Tag) Variants() []Variant {
|
||||
if !compact.Tag(t).MayHaveVariants() {
|
||||
return nil
|
||||
}
|
||||
v := []Variant{}
|
||||
x, str := "", t.tag().Variants()
|
||||
for str != "" {
|
||||
x, str = nextToken(str)
|
||||
v = append(v, Variant{x})
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
return Tag(compact.Tag(t).Parent())
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// Extension is a single BCP 47 extension.
|
||||
type Extension struct {
|
||||
s string
|
||||
}
|
||||
|
||||
// String returns the string representation of the extension, including the
|
||||
// type tag.
|
||||
func (e Extension) String() string {
|
||||
return e.s
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (e Extension, err error) {
|
||||
ext, err := language.ParseExtension(s)
|
||||
return Extension{ext}, err
|
||||
}
|
||||
|
||||
// Type returns the one-byte extension type of e. It returns 0 for the zero
|
||||
// exception.
|
||||
func (e Extension) Type() byte {
|
||||
if e.s == "" {
|
||||
return 0
|
||||
}
|
||||
return e.s[0]
|
||||
}
|
||||
|
||||
// Tokens returns the list of tokens of e.
|
||||
func (e Extension) Tokens() []string {
|
||||
return strings.Split(e.s, "-")
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return Extension{}, false
|
||||
}
|
||||
e, ok := t.tag().Extension(x)
|
||||
return Extension{e}, ok
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []Extension {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return nil
|
||||
}
|
||||
e := []Extension{}
|
||||
for _, ext := range t.tag().Extensions() {
|
||||
e = append(e, Extension{ext})
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
if key != "rg" && key != "va" {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return t.tag().TypeForKey(key)
|
||||
}
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
tt, err := t.tag().SetTypeForKey(key, value)
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// NumCompactTags is the number of compact tags. The maximum tag is
|
||||
// NumCompactTags-1.
|
||||
const NumCompactTags = compact.NumCompactTags
|
||||
|
||||
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func CompactIndex(t Tag) (index int, exact bool) {
|
||||
id, exact := compact.LanguageID(compact.Tag(t))
|
||||
return int(id), exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
||||
|
||||
// Base is an ISO 639 language code, used for encoding the base language
|
||||
// of a language tag.
|
||||
type Base struct {
|
||||
langID language.Language
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Base, error) {
|
||||
l, err := language.ParseBase(s)
|
||||
return Base{l}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the base language.
|
||||
func (b Base) String() string {
|
||||
return b.langID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Base) ISO3() string {
|
||||
return b.langID.ISO3()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Base) IsPrivateUse() bool {
|
||||
return b.langID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
scriptID language.Script
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
sc, err := language.ParseScript(s)
|
||||
return Script{sc}, err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
return s.scriptID.String()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return s.scriptID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID language.Region
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
rid, err := language.EncodeM49(r)
|
||||
return Region{rid}, err
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
r, err := language.ParseRegion(s)
|
||||
return Region{r}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
return r.regionID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
return r.regionID.String()
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return r.regionID.M49()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.regionID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
return r.regionID.IsCountry()
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
return r.regionID.IsGroup()
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
return r.regionID.Contains(c.regionID)
|
||||
}
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
tld, err := r.regionID.TLD()
|
||||
return Region{tld}, err
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
return Region{r.regionID.Canonicalize()}
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
variant string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
v, err := language.ParseVariant(s)
|
||||
return Variant{v.String()}, err
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.variant
|
||||
}
|
@ -0,0 +1,735 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// A MatchOption configures a Matcher.
|
||||
type MatchOption func(*matcher)
|
||||
|
||||
// PreferSameScript will, in the absence of a match, result in the first
|
||||
// preferred tag with the same script as a supported tag to match this supported
|
||||
// tag. The default is currently true, but this may change in the future.
|
||||
func PreferSameScript(preferSame bool) MatchOption {
|
||||
return func(m *matcher) { m.preferSameScript = preferSame }
|
||||
}
|
||||
|
||||
// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
|
||||
// There doesn't seem to be too much need for multiple types.
|
||||
// Making it a concrete type allows MatchStrings to be a method, which will
|
||||
// improve its discoverability.
|
||||
|
||||
// MatchStrings parses and matches the given strings until one of them matches
|
||||
// the language in the Matcher. A string may be an Accept-Language header as
|
||||
// handled by ParseAcceptLanguage. The default language is returned if no
|
||||
// other language matched.
|
||||
func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
|
||||
for _, accept := range lang {
|
||||
desired, _, err := ParseAcceptLanguage(accept)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if tag, index, conf := m.Match(desired...); conf != No {
|
||||
return tag, index
|
||||
}
|
||||
}
|
||||
tag, index, _ = m.Match()
|
||||
return
|
||||
}
|
||||
|
||||
// Matcher is the interface that wraps the Match method.
|
||||
//
|
||||
// Match returns the best match for any of the given tags, along with
|
||||
// a unique index associated with the returned tag and a confidence
|
||||
// score.
|
||||
type Matcher interface {
|
||||
Match(t ...Tag) (tag Tag, index int, c Confidence)
|
||||
}
|
||||
|
||||
// Comprehends reports the confidence score for a speaker of a given language
|
||||
// to being able to comprehend the written form of an alternative language.
|
||||
func Comprehends(speaker, alternative Tag) Confidence {
|
||||
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
|
||||
return c
|
||||
}
|
||||
|
||||
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
|
||||
// against a list of supported tags based on written intelligibility, closeness
|
||||
// of dialect, equivalence of subtags and various other rules. It is initialized
|
||||
// with the list of supported tags. The first element is used as the default
|
||||
// value in case no match is found.
|
||||
//
|
||||
// Its Match method matches the first of the given Tags to reach a certain
|
||||
// confidence threshold. The tags passed to Match should therefore be specified
|
||||
// in order of preference. Extensions are ignored for matching.
|
||||
//
|
||||
// The index returned by the Match method corresponds to the index of the
|
||||
// matched tag in t, but is augmented with the Unicode extension ('u')of the
|
||||
// corresponding preferred tag. This allows user locale options to be passed
|
||||
// transparently.
|
||||
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
|
||||
return newMatcher(t, options)
|
||||
}
|
||||
|
||||
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
|
||||
var tt language.Tag
|
||||
match, w, c := m.getBest(want...)
|
||||
if match != nil {
|
||||
tt, index = match.tag, match.index
|
||||
} else {
|
||||
// TODO: this should be an option
|
||||
tt = m.default_.tag
|
||||
if m.preferSameScript {
|
||||
outer:
|
||||
for _, w := range want {
|
||||
script, _ := w.Script()
|
||||
if script.scriptID == 0 {
|
||||
// Don't do anything if there is no script, such as with
|
||||
// private subtags.
|
||||
continue
|
||||
}
|
||||
for i, h := range m.supported {
|
||||
if script.scriptID == h.maxScript {
|
||||
tt, index = h.tag, i
|
||||
break outer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: select first language tag based on script.
|
||||
}
|
||||
if w.RegionID != tt.RegionID && w.RegionID != 0 {
|
||||
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
|
||||
tt.RegionID = w.RegionID
|
||||
tt.RemakeString()
|
||||
} else if r := w.RegionID.String(); len(r) == 2 {
|
||||
// TODO: also filter macro and deprecated.
|
||||
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
|
||||
}
|
||||
}
|
||||
// Copy options from the user-provided tag into the result tag. This is hard
|
||||
// to do after the fact, so we do it here.
|
||||
// TODO: add in alternative variants to -u-va-.
|
||||
// TODO: add preferred region to -u-rg-.
|
||||
if e := w.Extensions(); len(e) > 0 {
|
||||
b := language.Builder{}
|
||||
b.SetTag(tt)
|
||||
for _, e := range e {
|
||||
b.AddExt(e)
|
||||
}
|
||||
tt = b.Make()
|
||||
}
|
||||
return makeTag(tt), index, c
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// func (t *Tag) setTagsFrom(id Tag) {
|
||||
// t.LangID = id.LangID
|
||||
// t.ScriptID = id.ScriptID
|
||||
// t.RegionID = id.RegionID
|
||||
// }
|
||||
|
||||
// Tag Matching
|
||||
// CLDR defines an algorithm for finding the best match between two sets of language
|
||||
// tags. The basic algorithm defines how to score a possible match and then find
|
||||
// the match with the best score
|
||||
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
|
||||
// Using scoring has several disadvantages. The scoring obfuscates the importance of
|
||||
// the various factors considered, making the algorithm harder to understand. Using
|
||||
// scoring also requires the full score to be computed for each pair of tags.
|
||||
//
|
||||
// We will use a different algorithm which aims to have the following properties:
|
||||
// - clarity on the precedence of the various selection factors, and
|
||||
// - improved performance by allowing early termination of a comparison.
|
||||
//
|
||||
// Matching algorithm (overview)
|
||||
// Input:
|
||||
// - supported: a set of supported tags
|
||||
// - default: the default tag to return in case there is no match
|
||||
// - desired: list of desired tags, ordered by preference, starting with
|
||||
// the most-preferred.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1) Set the best match to the lowest confidence level
|
||||
// 2) For each tag in "desired":
|
||||
// a) For each tag in "supported":
|
||||
// 1) compute the match between the two tags.
|
||||
// 2) if the match is better than the previous best match, replace it
|
||||
// with the new match. (see next section)
|
||||
// b) if the current best match is Exact and pin is true the result will be
|
||||
// frozen to the language found thusfar, although better matches may
|
||||
// still be found for the same language.
|
||||
// 3) If the best match so far is below a certain threshold, return "default".
|
||||
//
|
||||
// Ranking:
|
||||
// We use two phases to determine whether one pair of tags are a better match
|
||||
// than another pair of tags. First, we determine a rough confidence level. If the
|
||||
// levels are different, the one with the highest confidence wins.
|
||||
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
|
||||
// rules.
|
||||
//
|
||||
// The confidence level of matching a pair of tags is determined by finding the
|
||||
// lowest confidence level of any matches of the corresponding subtags (the
|
||||
// result is deemed as good as its weakest link).
|
||||
// We define the following levels:
|
||||
// Exact - An exact match of a subtag, before adding likely subtags.
|
||||
// MaxExact - An exact match of a subtag, after adding likely subtags.
|
||||
// [See Note 2].
|
||||
// High - High level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// Low - Low level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// No - No mutual intelligibility.
|
||||
//
|
||||
// The following levels can occur for each type of subtag:
|
||||
// Base: Exact, MaxExact, High, Low, No
|
||||
// Script: Exact, MaxExact [see Note 3], Low, No
|
||||
// Region: Exact, MaxExact, High
|
||||
// Variant: Exact, High
|
||||
// Private: Exact, No
|
||||
//
|
||||
// Any result with a confidence level of Low or higher is deemed a possible match.
|
||||
// Once a desired tag matches any of the supported tags with a level of MaxExact
|
||||
// or higher, the next desired tag is not considered (see Step 2.b).
|
||||
// Note that CLDR provides languageMatching data that defines close equivalence
|
||||
// classes for base languages, scripts and regions.
|
||||
//
|
||||
// Tie-breaking
|
||||
// If we get the same confidence level for two matches, we apply a sequence of
|
||||
// tie-breaking rules. The first that succeeds defines the result. The rules are
|
||||
// applied in the following order.
|
||||
// 1) Original language was defined and was identical.
|
||||
// 2) Original region was defined and was identical.
|
||||
// 3) Distance between two maximized regions was the smallest.
|
||||
// 4) Original script was defined and was identical.
|
||||
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
|
||||
// If there is still no winner after these rules are applied, the first match
|
||||
// found wins.
|
||||
//
|
||||
// Notes:
|
||||
// [2] In practice, as matching of Exact is done in a separate phase from
|
||||
// matching the other levels, we reuse the Exact level to mean MaxExact in
|
||||
// the second phase. As a consequence, we only need the levels defined by
|
||||
// the Confidence type. The MaxExact confidence level is mapped to High in
|
||||
// the public API.
|
||||
// [3] We do not differentiate between maximized script values that were derived
|
||||
// from suppressScript versus most likely tag data. We determined that in
|
||||
// ranking the two, one ranks just after the other. Moreover, the two cannot
|
||||
// occur concurrently. As a consequence, they are identical for practical
|
||||
// purposes.
|
||||
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
|
||||
// the MaxExact level to allow iw vs he to still be a closer match than
|
||||
// en-AU vs en-US, for example.
|
||||
// [5] In CLDR a locale inherits fields that are unspecified for this locale
|
||||
// from its parent. Therefore, if a locale is a parent of another locale,
|
||||
// it is a strong measure for closeness, especially when no other tie
|
||||
// breaker rule applies. One could also argue it is inconsistent, for
|
||||
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
|
||||
// though its parent is pt-PT according to the inheritance rules.
|
||||
//
|
||||
// Implementation Details:
|
||||
// There are several performance considerations worth pointing out. Most notably,
|
||||
// we preprocess as much as possible (within reason) at the time of creation of a
|
||||
// matcher. This includes:
|
||||
// - creating a per-language map, which includes data for the raw base language
|
||||
// and its canonicalized variant (if applicable),
|
||||
// - expanding entries for the equivalence classes defined in CLDR's
|
||||
// languageMatch data.
|
||||
// The per-language map ensures that typically only a very small number of tags
|
||||
// need to be considered. The pre-expansion of canonicalized subtags and
|
||||
// equivalence classes reduces the amount of map lookups that need to be done at
|
||||
// runtime.
|
||||
|
||||
// matcher keeps a set of supported language tags, indexed by language.
|
||||
type matcher struct {
|
||||
default_ *haveTag
|
||||
supported []*haveTag
|
||||
index map[language.Language]*matchHeader
|
||||
passSettings bool
|
||||
preferSameScript bool
|
||||
}
|
||||
|
||||
// matchHeader has the lists of tags for exact matches and matches based on
|
||||
// maximized and canonicalized tags for a given language.
|
||||
type matchHeader struct {
|
||||
haveTags []*haveTag
|
||||
original bool
|
||||
}
|
||||
|
||||
// haveTag holds a supported Tag and its maximized script and region. The maximized
|
||||
// or canonicalized language is not stored as it is not needed during matching.
|
||||
type haveTag struct {
|
||||
tag language.Tag
|
||||
|
||||
// index of this tag in the original list of supported tags.
|
||||
index int
|
||||
|
||||
// conf is the maximum confidence that can result from matching this haveTag.
|
||||
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
|
||||
conf Confidence
|
||||
|
||||
// Maximized region and script.
|
||||
maxRegion language.Region
|
||||
maxScript language.Script
|
||||
|
||||
// altScript may be checked as an alternative match to maxScript. If altScript
|
||||
// matches, the confidence level for this match is Low. Theoretically there
|
||||
// could be multiple alternative scripts. This does not occur in practice.
|
||||
altScript language.Script
|
||||
|
||||
// nextMax is the index of the next haveTag with the same maximized tags.
|
||||
nextMax uint16
|
||||
}
|
||||
|
||||
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
|
||||
max := tag
|
||||
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
|
||||
max, _ = canonicalize(All, max)
|
||||
max, _ = max.Maximize()
|
||||
max.RemakeString()
|
||||
}
|
||||
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
|
||||
}
|
||||
|
||||
// altScript returns an alternative script that may match the given script with
|
||||
// a low confidence. At the moment, the langMatch data allows for at most one
|
||||
// script to map to another and we rely on this to keep the code simple.
|
||||
func altScript(l language.Language, s language.Script) language.Script {
|
||||
for _, alt := range matchScript {
|
||||
// TODO: also match cases where language is not the same.
|
||||
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
|
||||
language.Script(alt.haveScript) == s {
|
||||
return language.Script(alt.wantScript)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
|
||||
// Tags that have the same maximized values are linked by index.
|
||||
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
|
||||
h.original = h.original || exact
|
||||
// Don't add new exact matches.
|
||||
for _, v := range h.haveTags {
|
||||
if equalsRest(v.tag, n.tag) {
|
||||
return
|
||||
}
|
||||
}
|
||||
// Allow duplicate maximized tags, but create a linked list to allow quickly
|
||||
// comparing the equivalents and bail out.
|
||||
for i, v := range h.haveTags {
|
||||
if v.maxScript == n.maxScript &&
|
||||
v.maxRegion == n.maxRegion &&
|
||||
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
|
||||
for h.haveTags[i].nextMax != 0 {
|
||||
i = int(h.haveTags[i].nextMax)
|
||||
}
|
||||
h.haveTags[i].nextMax = uint16(len(h.haveTags))
|
||||
break
|
||||
}
|
||||
}
|
||||
h.haveTags = append(h.haveTags, &n)
|
||||
}
|
||||
|
||||
// header returns the matchHeader for the given language. It creates one if
|
||||
// it doesn't already exist.
|
||||
func (m *matcher) header(l language.Language) *matchHeader {
|
||||
if h := m.index[l]; h != nil {
|
||||
return h
|
||||
}
|
||||
h := &matchHeader{}
|
||||
m.index[l] = h
|
||||
return h
|
||||
}
|
||||
|
||||
func toConf(d uint8) Confidence {
|
||||
if d <= 10 {
|
||||
return High
|
||||
}
|
||||
if d < 30 {
|
||||
return Low
|
||||
}
|
||||
return No
|
||||
}
|
||||
|
||||
// newMatcher builds an index for the given supported tags and returns it as
|
||||
// a matcher. It also expands the index by considering various equivalence classes
|
||||
// for a given tag.
|
||||
func newMatcher(supported []Tag, options []MatchOption) *matcher {
|
||||
m := &matcher{
|
||||
index: make(map[language.Language]*matchHeader),
|
||||
preferSameScript: true,
|
||||
}
|
||||
for _, o := range options {
|
||||
o(m)
|
||||
}
|
||||
if len(supported) == 0 {
|
||||
m.default_ = &haveTag{}
|
||||
return m
|
||||
}
|
||||
// Add supported languages to the index. Add exact matches first to give
|
||||
// them precedence.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, _ := makeHaveTag(tt, i)
|
||||
m.header(tt.LangID).addIfNew(pair, true)
|
||||
m.supported = append(m.supported, &pair)
|
||||
}
|
||||
m.default_ = m.header(supported[0].lang()).haveTags[0]
|
||||
// Keep these in two different loops to support the case that two equivalent
|
||||
// languages are distinguished, such as iw and he.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, max := makeHaveTag(tt, i)
|
||||
if max != tt.LangID {
|
||||
m.header(max).addIfNew(pair, true)
|
||||
}
|
||||
}
|
||||
|
||||
// update is used to add indexes in the map for equivalent languages.
|
||||
// update will only add entries to original indexes, thus not computing any
|
||||
// transitive relations.
|
||||
update := func(want, have uint16, conf Confidence) {
|
||||
if hh := m.index[language.Language(have)]; hh != nil {
|
||||
if !hh.original {
|
||||
return
|
||||
}
|
||||
hw := m.header(language.Language(want))
|
||||
for _, ht := range hh.haveTags {
|
||||
v := *ht
|
||||
if conf < v.conf {
|
||||
v.conf = conf
|
||||
}
|
||||
v.nextMax = 0 // this value needs to be recomputed
|
||||
if v.altScript != 0 {
|
||||
v.altScript = altScript(language.Language(want), v.maxScript)
|
||||
}
|
||||
hw.addIfNew(v, conf == Exact && hh.original)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for languages with mutual intelligibility as defined by CLDR's
|
||||
// languageMatch data.
|
||||
for _, ml := range matchLang {
|
||||
update(ml.want, ml.have, toConf(ml.distance))
|
||||
if !ml.oneway {
|
||||
update(ml.have, ml.want, toConf(ml.distance))
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for possible canonicalizations. This is an optimization to
|
||||
// ensure that only one map lookup needs to be done at runtime per desired tag.
|
||||
// First we match deprecated equivalents. If they are perfect equivalents
|
||||
// (their canonicalization simply substitutes a different language code, but
|
||||
// nothing else), the match confidence is Exact, otherwise it is High.
|
||||
for i, lm := range language.AliasMap {
|
||||
// If deprecated codes match and there is no fiddling with the script or
|
||||
// or region, we consider it an exact match.
|
||||
conf := Exact
|
||||
if language.AliasTypes[i] != language.Macro {
|
||||
if !isExactEquivalent(language.Language(lm.From)) {
|
||||
conf = High
|
||||
}
|
||||
update(lm.To, lm.From, conf)
|
||||
}
|
||||
update(lm.From, lm.To, conf)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// getBest gets the best matching tag in m for any of the given tags, taking into
|
||||
// account the order of preference of the given tags.
|
||||
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
|
||||
best := bestMatch{}
|
||||
for i, ww := range want {
|
||||
w := ww.tag()
|
||||
var max language.Tag
|
||||
// Check for exact match first.
|
||||
h := m.index[w.LangID]
|
||||
if w.LangID != 0 {
|
||||
if h == nil {
|
||||
continue
|
||||
}
|
||||
// Base language is defined.
|
||||
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
|
||||
// A region that is added through canonicalization is stronger than
|
||||
// a maximized region: set it in the original (e.g. mo -> ro-MD).
|
||||
if w.RegionID != max.RegionID {
|
||||
w.RegionID = max.RegionID
|
||||
}
|
||||
// TODO: should we do the same for scripts?
|
||||
// See test case: en, sr, nl ; sh ; sr
|
||||
max, _ = max.Maximize()
|
||||
} else {
|
||||
// Base language is not defined.
|
||||
if h != nil {
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
if equalsRest(have.tag, w) {
|
||||
return have, w, Exact
|
||||
}
|
||||
}
|
||||
}
|
||||
if w.ScriptID == 0 && w.RegionID == 0 {
|
||||
// We skip all tags matching und for approximate matching, including
|
||||
// private tags.
|
||||
continue
|
||||
}
|
||||
max, _ = w.Maximize()
|
||||
if h = m.index[max.LangID]; h == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
pin := true
|
||||
for _, t := range want[i+1:] {
|
||||
if w.LangID == t.lang() {
|
||||
pin = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Check for match based on maximized tag.
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
if best.conf == Exact {
|
||||
for have.nextMax != 0 {
|
||||
have = h.haveTags[have.nextMax]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
}
|
||||
}
|
||||
if best.conf <= No {
|
||||
if len(want) != 0 {
|
||||
return nil, want[0].tag(), No
|
||||
}
|
||||
return nil, language.Tag{}, No
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
|
||||
// bestMatch accumulates the best match so far.
|
||||
type bestMatch struct {
|
||||
have *haveTag
|
||||
want language.Tag
|
||||
conf Confidence
|
||||
pinnedRegion language.Region
|
||||
pinLanguage bool
|
||||
sameRegionGroup bool
|
||||
// Cached results from applying tie-breaking rules.
|
||||
origLang bool
|
||||
origReg bool
|
||||
paradigmReg bool
|
||||
regGroupDist uint8
|
||||
origScript bool
|
||||
}
|
||||
|
||||
// update updates the existing best match if the new pair is considered to be a
|
||||
// better match. To determine if the given pair is a better match, it first
|
||||
// computes the rough confidence level. If this surpasses the current match, it
|
||||
// will replace it and update the tie-breaker rule cache. If there is a tie, it
|
||||
// proceeds with applying a series of tie-breaker rules. If there is no
|
||||
// conclusive winner after applying the tie-breaker rules, it leaves the current
|
||||
// match as the preferred match.
|
||||
//
|
||||
// If pin is true and have and tag are a strong match, it will henceforth only
|
||||
// consider matches for this language. This corresponds to the nothing that most
|
||||
// users have a strong preference for the first defined language. A user can
|
||||
// still prefer a second language over a dialect of the preferred language by
|
||||
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
|
||||
// be false.
|
||||
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
|
||||
// Bail if the maximum attainable confidence is below that of the current best match.
|
||||
c := have.conf
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
// Don't change the language once we already have found an exact match.
|
||||
if m.pinLanguage && tag.LangID != m.want.LangID {
|
||||
return
|
||||
}
|
||||
// Pin the region group if we are comparing tags for the same language.
|
||||
if tag.LangID == m.want.LangID && m.sameRegionGroup {
|
||||
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
|
||||
if !sameGroup {
|
||||
return
|
||||
}
|
||||
}
|
||||
if c == Exact && have.maxScript == maxScript {
|
||||
// If there is another language and then another entry of this language,
|
||||
// don't pin anything, otherwise pin the language.
|
||||
m.pinLanguage = pin
|
||||
}
|
||||
if equalsRest(have.tag, tag) {
|
||||
} else if have.maxScript != maxScript {
|
||||
// There is usually very little comprehension between different scripts.
|
||||
// In a few cases there may still be Low comprehension. This possibility
|
||||
// is pre-computed and stored in have.altScript.
|
||||
if Low < m.conf || have.altScript != maxScript {
|
||||
return
|
||||
}
|
||||
c = Low
|
||||
} else if have.maxRegion != maxRegion {
|
||||
if High < c {
|
||||
// There is usually a small difference between languages across regions.
|
||||
c = High
|
||||
}
|
||||
}
|
||||
|
||||
// We store the results of the computations of the tie-breaker rules along
|
||||
// with the best match. There is no need to do the checks once we determine
|
||||
// we have a winner, but we do still need to do the tie-breaker computations.
|
||||
// We use "beaten" to keep track if we still need to do the checks.
|
||||
beaten := false // true if the new pair defeats the current one.
|
||||
if c != m.conf {
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Tie-breaker rules:
|
||||
// We prefer if the pre-maximized language was specified and identical.
|
||||
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
|
||||
if !beaten && m.origLang != origLang {
|
||||
if m.origLang {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// We prefer if the pre-maximized region was specified and identical.
|
||||
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
|
||||
if !beaten && m.origReg != origReg {
|
||||
if m.origReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
|
||||
if !beaten && m.regGroupDist != regGroupDist {
|
||||
if regGroupDist > m.regGroupDist {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
|
||||
if !beaten && m.paradigmReg != paradigmReg {
|
||||
if !paradigmReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Next we prefer if the pre-maximized script was specified and identical.
|
||||
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
|
||||
if !beaten && m.origScript != origScript {
|
||||
if m.origScript {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Update m to the newly found best match.
|
||||
if beaten {
|
||||
m.have = have
|
||||
m.want = tag
|
||||
m.conf = c
|
||||
m.pinnedRegion = maxRegion
|
||||
m.sameRegionGroup = sameGroup
|
||||
m.origLang = origLang
|
||||
m.origReg = origReg
|
||||
m.paradigmReg = paradigmReg
|
||||
m.origScript = origScript
|
||||
m.regGroupDist = regGroupDist
|
||||
}
|
||||
}
|
||||
|
||||
func isParadigmLocale(lang language.Language, r language.Region) bool {
|
||||
for _, e := range paradigmLocales {
|
||||
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// regionGroupDist computes the distance between two regions based on their
|
||||
// CLDR grouping.
|
||||
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
|
||||
const defaultDistance = 4
|
||||
|
||||
aGroup := uint(regionToGroups[a]) << 1
|
||||
bGroup := uint(regionToGroups[b]) << 1
|
||||
for _, ri := range matchRegion {
|
||||
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
|
||||
group := uint(1 << (ri.group &^ 0x80))
|
||||
if 0x80&ri.group == 0 {
|
||||
if aGroup&bGroup&group != 0 { // Both regions are in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
} else {
|
||||
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return defaultDistance, true
|
||||
}
|
||||
|
||||
// equalsRest compares everything except the language.
|
||||
func equalsRest(a, b language.Tag) bool {
|
||||
// TODO: don't include extensions in this comparison. To do this efficiently,
|
||||
// though, we should handle private tags separately.
|
||||
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
|
||||
}
|
||||
|
||||
// isExactEquivalent returns true if canonicalizing the language will not alter
|
||||
// the script or region of a tag.
|
||||
func isExactEquivalent(l language.Language) bool {
|
||||
for _, o := range notEquivalent {
|
||||
if o == l {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var notEquivalent []language.Language
|
||||
|
||||
func init() {
|
||||
// Create a list of all languages for which canonicalization may alter the
|
||||
// script or region.
|
||||
for _, lm := range language.AliasMap {
|
||||
tag := language.Tag{LangID: language.Language(lm.From)}
|
||||
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
|
||||
notEquivalent = append(notEquivalent, language.Language(lm.From))
|
||||
}
|
||||
}
|
||||
// Maximize undefined regions of paradigm locales.
|
||||
for i, v := range paradigmLocales {
|
||||
t := language.Tag{LangID: language.Language(v[0])}
|
||||
max, _ := t.Maximize()
|
||||
if v[1] == 0 {
|
||||
paradigmLocales[i][1] = uint16(max.RegionID)
|
||||
}
|
||||
if v[2] == 0 {
|
||||
paradigmLocales[i][2] = uint16(max.RegionID)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,228 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError interface {
|
||||
error
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
Subtag() string
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the default canonicalization type.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
return Default.Parse(s)
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the the canonicalization type c.
|
||||
func (c CanonType) Parse(s string) (t Tag, err error) {
|
||||
tt, err := language.Parse(s)
|
||||
if err != nil {
|
||||
return makeTag(tt), err
|
||||
}
|
||||
tt, changed := canonicalize(c, tt)
|
||||
if changed {
|
||||
tt.RemakeString()
|
||||
}
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using the Default CanonType. If one or
|
||||
// more errors are encountered, one of the errors is returned.
|
||||
func Compose(part ...interface{}) (t Tag, err error) {
|
||||
return Default.Compose(part...)
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using CanonType c. If one or more errors
|
||||
// are encountered, one of the errors is returned.
|
||||
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
||||
var b language.Builder
|
||||
if err = update(&b, part...); err != nil {
|
||||
return und, err
|
||||
}
|
||||
b.Tag, _ = canonicalize(c, b.Tag)
|
||||
return makeTag(b.Make()), err
|
||||
}
|
||||
|
||||
var errInvalidArgument = errors.New("invalid Extension or Variant")
|
||||
|
||||
func update(b *language.Builder, part ...interface{}) (err error) {
|
||||
for _, x := range part {
|
||||
switch v := x.(type) {
|
||||
case Tag:
|
||||
b.SetTag(v.tag())
|
||||
case Base:
|
||||
b.Tag.LangID = v.langID
|
||||
case Script:
|
||||
b.Tag.ScriptID = v.scriptID
|
||||
case Region:
|
||||
b.Tag.RegionID = v.regionID
|
||||
case Variant:
|
||||
if v.variant == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.AddVariant(v.variant)
|
||||
case Extension:
|
||||
if v.s == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.SetExt(v.s)
|
||||
case []Variant:
|
||||
b.ClearVariants()
|
||||
for _, v := range v {
|
||||
b.AddVariant(v.variant)
|
||||
}
|
||||
case []Extension:
|
||||
b.ClearExtensions()
|
||||
for _, e := range v {
|
||||
b.SetExt(e.s)
|
||||
}
|
||||
// TODO: support parsing of raw strings based on morphology or just extensions?
|
||||
case error:
|
||||
if v != nil {
|
||||
err = v
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||
|
||||
// ParseAcceptLanguage parses the contents of an Accept-Language header as
|
||||
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||
// a list of corresponding quality weights. It is more permissive than RFC 2616
|
||||
// and may return non-nil slices even if the input is not valid.
|
||||
// The Tags will be sorted by highest weight first and then by first occurrence.
|
||||
// Tags with a weight of zero will be dropped. An error will be returned if the
|
||||
// input could not be parsed.
|
||||
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
||||
var entry string
|
||||
for s != "" {
|
||||
if entry, s = split(s, ','); entry == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, weight := split(entry, ';')
|
||||
|
||||
// Scan the language.
|
||||
t, err := Parse(entry)
|
||||
if err != nil {
|
||||
id, ok := acceptFallback[entry]
|
||||
if !ok {
|
||||
return nil, nil, err
|
||||
}
|
||||
t = makeTag(language.Tag{LangID: id})
|
||||
}
|
||||
|
||||
// Scan the optional weight.
|
||||
w := 1.0
|
||||
if weight != "" {
|
||||
weight = consume(weight, 'q')
|
||||
weight = consume(weight, '=')
|
||||
// consume returns the empty string when a token could not be
|
||||
// consumed, resulting in an error for ParseFloat.
|
||||
if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
||||
return nil, nil, errInvalidWeight
|
||||
}
|
||||
// Drop tags with a quality weight of 0.
|
||||
if w <= 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tag = append(tag, t)
|
||||
q = append(q, float32(w))
|
||||
}
|
||||
sortStable(&tagSort{tag, q})
|
||||
return tag, q, nil
|
||||
}
|
||||
|
||||
// consume removes a leading token c from s and returns the result or the empty
|
||||
// string if there is no such token.
|
||||
func consume(s string, c byte) string {
|
||||
if s == "" || s[0] != c {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(s[1:])
|
||||
}
|
||||
|
||||
func split(s string, c byte) (head, tail string) {
|
||||
if i := strings.IndexByte(s, c); i >= 0 {
|
||||
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
||||
}
|
||||
return strings.TrimSpace(s), ""
|
||||
}
|
||||
|
||||
// Add hack mapping to deal with a small number of cases that that occur
|
||||
// in Accept-Language (with reasonable frequency).
|
||||
var acceptFallback = map[string]language.Language{
|
||||
"english": _en,
|
||||
"deutsch": _de,
|
||||
"italian": _it,
|
||||
"french": _fr,
|
||||
"*": _mul, // defined in the spec to match all languages.
|
||||
}
|
||||
|
||||
type tagSort struct {
|
||||
tag []Tag
|
||||
q []float32
|
||||
}
|
||||
|
||||
func (s *tagSort) Len() int {
|
||||
return len(s.q)
|
||||
}
|
||||
|
||||
func (s *tagSort) Less(i, j int) bool {
|
||||
return s.q[i] > s.q[j]
|
||||
}
|
||||
|
||||
func (s *tagSort) Swap(i, j int) {
|
||||
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
||||
s.q[i], s.q[j] = s.q[j], s.q[i]
|
||||
}
|
@ -0,0 +1,298 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// CLDRVersion is the CLDR version from which the tables in this package are derived.
|
||||
const CLDRVersion = "32"
|
||||
|
||||
const (
|
||||
_de = 269
|
||||
_en = 313
|
||||
_fr = 350
|
||||
_it = 505
|
||||
_mo = 784
|
||||
_no = 879
|
||||
_nb = 839
|
||||
_pt = 960
|
||||
_sh = 1031
|
||||
_mul = 806
|
||||
_und = 0
|
||||
)
|
||||
const (
|
||||
_001 = 1
|
||||
_419 = 31
|
||||
_BR = 65
|
||||
_CA = 73
|
||||
_ES = 110
|
||||
_GB = 123
|
||||
_MD = 188
|
||||
_PT = 238
|
||||
_UK = 306
|
||||
_US = 309
|
||||
_ZZ = 357
|
||||
_XA = 323
|
||||
_XC = 325
|
||||
_XK = 333
|
||||
)
|
||||
const (
|
||||
_Latn = 87
|
||||
_Hani = 54
|
||||
_Hans = 56
|
||||
_Hant = 57
|
||||
_Qaaa = 139
|
||||
_Qaai = 147
|
||||
_Qabx = 188
|
||||
_Zinh = 236
|
||||
_Zyyy = 241
|
||||
_Zzzz = 242
|
||||
)
|
||||
|
||||
var regionToGroups = []uint8{ // 357 elements
|
||||
// Entry 0 - 3F
|
||||
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
|
||||
// Entry 40 - 7F
|
||||
0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
|
||||
0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
|
||||
// Entry 80 - BF
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||
// Entry C0 - FF
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
|
||||
0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 100 - 13F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||
0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
|
||||
// Entry 140 - 17F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
} // Size: 381 bytes
|
||||
|
||||
var paradigmLocales = [][3]uint16{ // 3 elements
|
||||
0: [3]uint16{0x139, 0x0, 0x7b},
|
||||
1: [3]uint16{0x13e, 0x0, 0x1f},
|
||||
2: [3]uint16{0x3c0, 0x41, 0xee},
|
||||
} // Size: 42 bytes
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want uint16
|
||||
have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
type scriptIntelligibility struct {
|
||||
wantLang uint16
|
||||
haveLang uint16
|
||||
wantScript uint8
|
||||
haveScript uint8
|
||||
distance uint8
|
||||
}
|
||||
type regionIntelligibility struct {
|
||||
lang uint16
|
||||
script uint8
|
||||
group uint8
|
||||
distance uint8
|
||||
}
|
||||
|
||||
// matchLang holds pairs of langIDs of base languages that are typically
|
||||
// mutually intelligible. Each pair is associated with a confidence and
|
||||
// whether the intelligibility goes one or both ways.
|
||||
var matchLang = []mutualIntelligibility{ // 113 elements
|
||||
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
|
||||
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
|
||||
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
|
||||
3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
|
||||
4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
|
||||
5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
|
||||
6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
|
||||
7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
|
||||
8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
|
||||
9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
|
||||
11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
|
||||
12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
|
||||
13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
|
||||
14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
|
||||
17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
|
||||
18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
|
||||
19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
|
||||
20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
|
||||
21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
|
||||
22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
|
||||
23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
|
||||
24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
|
||||
25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
|
||||
26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
|
||||
27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
|
||||
28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
|
||||
29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
|
||||
30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
|
||||
31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
|
||||
32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
|
||||
33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
|
||||
34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
|
||||
35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
|
||||
36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
|
||||
37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
|
||||
38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
|
||||
41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
|
||||
44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
|
||||
45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
|
||||
47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
|
||||
48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
|
||||
50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
|
||||
51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
|
||||
52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
|
||||
53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
|
||||
55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
|
||||
56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
|
||||
59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
|
||||
60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
|
||||
61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
|
||||
62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
|
||||
64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
|
||||
65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
|
||||
67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
|
||||
69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
|
||||
70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
|
||||
71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
|
||||
72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
|
||||
73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
|
||||
74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
|
||||
75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
|
||||
76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
|
||||
77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
|
||||
78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
|
||||
79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
|
||||
80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
|
||||
81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
|
||||
82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
|
||||
83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
|
||||
84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
|
||||
87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
|
||||
89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
|
||||
90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
|
||||
91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
|
||||
92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
|
||||
93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
|
||||
95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
|
||||
96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
|
||||
97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
|
||||
99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
|
||||
101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
|
||||
102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
|
||||
103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
|
||||
105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
|
||||
106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
|
||||
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
|
||||
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
|
||||
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
|
||||
} // Size: 702 bytes
|
||||
|
||||
// matchScript holds pairs of scriptIDs where readers of one script
|
||||
// can typically also read the other. Each is associated with a confidence.
|
||||
var matchScript = []scriptIntelligibility{ // 26 elements
|
||||
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
|
||||
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
|
||||
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
|
||||
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
|
||||
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
|
||||
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
|
||||
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
|
||||
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
|
||||
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
|
||||
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
|
||||
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
|
||||
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
|
||||
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
|
||||
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
|
||||
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
|
||||
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
|
||||
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
|
||||
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
|
||||
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
|
||||
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
|
||||
} // Size: 232 bytes
|
||||
|
||||
var matchRegion = []regionIntelligibility{ // 15 elements
|
||||
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
|
||||
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
|
||||
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
|
||||
3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
|
||||
4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
|
||||
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
|
||||
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
|
||||
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
|
||||
8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
|
||||
9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
|
||||
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
|
||||
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
|
||||
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
|
||||
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
|
||||
14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
|
||||
} // Size: 114 bytes
|
||||
|
||||
// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46
|
@ -0,0 +1,145 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
// TODO: Various sets of commonly use tags and regions.
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func (c CanonType) MustParse(s string) Tag {
|
||||
t, err := c.Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Base {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag(compact.Afrikaans)
|
||||
Amharic Tag = Tag(compact.Amharic)
|
||||
Arabic Tag = Tag(compact.Arabic)
|
||||
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
|
||||
Azerbaijani Tag = Tag(compact.Azerbaijani)
|
||||
Bulgarian Tag = Tag(compact.Bulgarian)
|
||||
Bengali Tag = Tag(compact.Bengali)
|
||||
Catalan Tag = Tag(compact.Catalan)
|
||||
Czech Tag = Tag(compact.Czech)
|
||||
Danish Tag = Tag(compact.Danish)
|
||||
German Tag = Tag(compact.German)
|
||||
Greek Tag = Tag(compact.Greek)
|
||||
English Tag = Tag(compact.English)
|
||||
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
|
||||
BritishEnglish Tag = Tag(compact.BritishEnglish)
|
||||
Spanish Tag = Tag(compact.Spanish)
|
||||
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
|
||||
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
|
||||
Estonian Tag = Tag(compact.Estonian)
|
||||
Persian Tag = Tag(compact.Persian)
|
||||
Finnish Tag = Tag(compact.Finnish)
|
||||
Filipino Tag = Tag(compact.Filipino)
|
||||
French Tag = Tag(compact.French)
|
||||
CanadianFrench Tag = Tag(compact.CanadianFrench)
|
||||
Gujarati Tag = Tag(compact.Gujarati)
|
||||
Hebrew Tag = Tag(compact.Hebrew)
|
||||
Hindi Tag = Tag(compact.Hindi)
|
||||
Croatian Tag = Tag(compact.Croatian)
|
||||
Hungarian Tag = Tag(compact.Hungarian)
|
||||
Armenian Tag = Tag(compact.Armenian)
|
||||
Indonesian Tag = Tag(compact.Indonesian)
|
||||
Icelandic Tag = Tag(compact.Icelandic)
|
||||
Italian Tag = Tag(compact.Italian)
|
||||
Japanese Tag = Tag(compact.Japanese)
|
||||
Georgian Tag = Tag(compact.Georgian)
|
||||
Kazakh Tag = Tag(compact.Kazakh)
|
||||
Khmer Tag = Tag(compact.Khmer)
|
||||
Kannada Tag = Tag(compact.Kannada)
|
||||
Korean Tag = Tag(compact.Korean)
|
||||
Kirghiz Tag = Tag(compact.Kirghiz)
|
||||
Lao Tag = Tag(compact.Lao)
|
||||
Lithuanian Tag = Tag(compact.Lithuanian)
|
||||
Latvian Tag = Tag(compact.Latvian)
|
||||
Macedonian Tag = Tag(compact.Macedonian)
|
||||
Malayalam Tag = Tag(compact.Malayalam)
|
||||
Mongolian Tag = Tag(compact.Mongolian)
|
||||
Marathi Tag = Tag(compact.Marathi)
|
||||
Malay Tag = Tag(compact.Malay)
|
||||
Burmese Tag = Tag(compact.Burmese)
|
||||
Nepali Tag = Tag(compact.Nepali)
|
||||
Dutch Tag = Tag(compact.Dutch)
|
||||
Norwegian Tag = Tag(compact.Norwegian)
|
||||
Punjabi Tag = Tag(compact.Punjabi)
|
||||
Polish Tag = Tag(compact.Polish)
|
||||
Portuguese Tag = Tag(compact.Portuguese)
|
||||
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
|
||||
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
|
||||
Romanian Tag = Tag(compact.Romanian)
|
||||
Russian Tag = Tag(compact.Russian)
|
||||
Sinhala Tag = Tag(compact.Sinhala)
|
||||
Slovak Tag = Tag(compact.Slovak)
|
||||
Slovenian Tag = Tag(compact.Slovenian)
|
||||
Albanian Tag = Tag(compact.Albanian)
|
||||
Serbian Tag = Tag(compact.Serbian)
|
||||
SerbianLatin Tag = Tag(compact.SerbianLatin)
|
||||
Swedish Tag = Tag(compact.Swedish)
|
||||
Swahili Tag = Tag(compact.Swahili)
|
||||
Tamil Tag = Tag(compact.Tamil)
|
||||
Telugu Tag = Tag(compact.Telugu)
|
||||
Thai Tag = Tag(compact.Thai)
|
||||
Turkish Tag = Tag(compact.Turkish)
|
||||
Ukrainian Tag = Tag(compact.Ukrainian)
|
||||
Urdu Tag = Tag(compact.Urdu)
|
||||
Uzbek Tag = Tag(compact.Uzbek)
|
||||
Vietnamese Tag = Tag(compact.Vietnamese)
|
||||
Chinese Tag = Tag(compact.Chinese)
|
||||
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
|
||||
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
|
||||
Zulu Tag = Tag(compact.Zulu)
|
||||
)
|
@ -0,0 +1,36 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package message
|
||||
|
||||
// TODO: some types in this file will need to be made public at some time.
|
||||
// Documentation and method names will reflect this by using the exported name.
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message/catalog"
|
||||
)
|
||||
|
||||
// MatchLanguage reports the matched tag obtained from language.MatchStrings for
|
||||
// the Matcher of the DefaultCatalog.
|
||||
func MatchLanguage(preferred ...string) language.Tag {
|
||||
c := DefaultCatalog
|
||||
tag, _ := language.MatchStrings(c.Matcher(), preferred...)
|
||||
return tag
|
||||
}
|
||||
|
||||
// DefaultCatalog is used by SetString.
|
||||
var DefaultCatalog catalog.Catalog = defaultCatalog
|
||||
|
||||
var defaultCatalog = catalog.NewBuilder()
|
||||
|
||||
// SetString calls SetString on the initial default Catalog.
|
||||
func SetString(tag language.Tag, key string, msg string) error {
|
||||
return defaultCatalog.SetString(tag, key, msg)
|
||||
}
|
||||
|
||||
// Set calls Set on the initial default Catalog.
|
||||
func Set(tag language.Tag, key string, msg ...catalog.Message) error {
|
||||
return defaultCatalog.Set(tag, key, msg...)
|
||||
}
|
@ -0,0 +1,369 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package catalog defines collections of translated format strings.
|
||||
//
|
||||
// This package mostly defines types for populating catalogs with messages. The
|
||||
// catmsg package contains further definitions for creating custom message and
|
||||
// dictionary types as well as packages that use Catalogs.
|
||||
//
|
||||
// Package catalog defines various interfaces: Dictionary, Loader, and Message.
|
||||
// A Dictionary maintains a set of translations of format strings for a single
|
||||
// language. The Loader interface defines a source of dictionaries. A
|
||||
// translation of a format string is represented by a Message.
|
||||
//
|
||||
//
|
||||
// Catalogs
|
||||
//
|
||||
// A Catalog defines a programmatic interface for setting message translations.
|
||||
// It maintains a set of per-language dictionaries with translations for a set
|
||||
// of keys. For message translation to function properly, a translation should
|
||||
// be defined for each key for each supported language. A dictionary may be
|
||||
// underspecified, though, if there is a parent language that already defines
|
||||
// the key. For example, a Dictionary for "en-GB" could leave out entries that
|
||||
// are identical to those in a dictionary for "en".
|
||||
//
|
||||
//
|
||||
// Messages
|
||||
//
|
||||
// A Message is a format string which varies on the value of substitution
|
||||
// variables. For instance, to indicate the number of results one could want "no
|
||||
// results" if there are none, "1 result" if there is 1, and "%d results" for
|
||||
// any other number. Catalog is agnostic to the kind of format strings that are
|
||||
// used: for instance, messages can follow either the printf-style substitution
|
||||
// from package fmt or use templates.
|
||||
//
|
||||
// A Message does not substitute arguments in the format string. This job is
|
||||
// reserved for packages that render strings, such as message, that use Catalogs
|
||||
// to selected string. This separation of concerns allows Catalog to be used to
|
||||
// store any kind of formatting strings.
|
||||
//
|
||||
//
|
||||
// Selecting messages based on linguistic features of substitution arguments
|
||||
//
|
||||
// Messages may vary based on any linguistic features of the argument values.
|
||||
// The most common one is plural form, but others exist.
|
||||
//
|
||||
// Selection messages are provided in packages that provide support for a
|
||||
// specific linguistic feature. The following snippet uses plural.Select:
|
||||
//
|
||||
// catalog.Set(language.English, "You are %d minute(s) late.",
|
||||
// plural.Select(1,
|
||||
// "one", "You are 1 minute late.",
|
||||
// "other", "You are %d minutes late."))
|
||||
//
|
||||
// In this example, a message is stored in the Catalog where one of two messages
|
||||
// is selected based on the first argument, a number. The first message is
|
||||
// selected if the argument is singular (identified by the selector "one") and
|
||||
// the second message is selected in all other cases. The selectors are defined
|
||||
// by the plural rules defined in CLDR. The selector "other" is special and will
|
||||
// always match. Each language always defines one of the linguistic categories
|
||||
// to be "other." For English, singular is "one" and plural is "other".
|
||||
//
|
||||
// Selects can be nested. This allows selecting sentences based on features of
|
||||
// multiple arguments or multiple linguistic properties of a single argument.
|
||||
//
|
||||
//
|
||||
// String interpolation
|
||||
//
|
||||
// There is often a lot of commonality between the possible variants of a
|
||||
// message. For instance, in the example above the word "minute" varies based on
|
||||
// the plural catogory of the argument, but the rest of the sentence is
|
||||
// identical. Using interpolation the above message can be rewritten as:
|
||||
//
|
||||
// catalog.Set(language.English, "You are %d minute(s) late.",
|
||||
// catalog.Var("minutes",
|
||||
// plural.Select(1, "one", "minute", "other", "minutes")),
|
||||
// catalog.String("You are %[1]d ${minutes} late."))
|
||||
//
|
||||
// Var is defined to return the variable name if the message does not yield a
|
||||
// match. This allows us to further simplify this snippet to
|
||||
//
|
||||
// catalog.Set(language.English, "You are %d minute(s) late.",
|
||||
// catalog.Var("minutes", plural.Select(1, "one", "minute")),
|
||||
// catalog.String("You are %d ${minutes} late."))
|
||||
//
|
||||
// Overall this is still only a minor improvement, but things can get a lot more
|
||||
// unwieldy if more than one linguistic feature is used to determine a message
|
||||
// variant. Consider the following example:
|
||||
//
|
||||
// // argument 1: list of hosts, argument 2: list of guests
|
||||
// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.",
|
||||
// catalog.Var("their",
|
||||
// plural.Select(1,
|
||||
// "one", gender.Select(1, "female", "her", "other", "his"))),
|
||||
// catalog.Var("invites", plural.Select(1, "one", "invite"))
|
||||
// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")),
|
||||
//
|
||||
// Without variable substitution, this would have to be written as
|
||||
//
|
||||
// // argument 1: list of hosts, argument 2: list of guests
|
||||
// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.",
|
||||
// plural.Select(1,
|
||||
// "one", gender.Select(1,
|
||||
// "female", "%[1]v invites %[2]v to her party."
|
||||
// "other", "%[1]v invites %[2]v to his party."),
|
||||
// "other", "%[1]v invites %[2]v to their party.")
|
||||
//
|
||||
// Not necessarily shorter, but using variables there is less duplication and
|
||||
// the messages are more maintenance friendly. Moreover, languages may have up
|
||||
// to six plural forms. This makes the use of variables more welcome.
|
||||
//
|
||||
// Different messages using the same inflections can reuse variables by moving
|
||||
// them to macros. Using macros we can rewrite the message as:
|
||||
//
|
||||
// // argument 1: list of hosts, argument 2: list of guests
|
||||
// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.",
|
||||
// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.")
|
||||
//
|
||||
// Where the following macros were defined separately.
|
||||
//
|
||||
// catalog.SetMacro(language.English, "invites", plural.Select(1, "one", "invite"))
|
||||
// catalog.SetMacro(language.English, "their", plural.Select(1,
|
||||
// "one", gender.Select(1, "female", "her", "other", "his"))),
|
||||
//
|
||||
// Placeholders use parentheses and the arguments to invoke a macro.
|
||||
//
|
||||
//
|
||||
// Looking up messages
|
||||
//
|
||||
// Message lookup using Catalogs is typically only done by specialized packages
|
||||
// and is not something the user should be concerned with. For instance, to
|
||||
// express the tardiness of a user using the related message we defined earlier,
|
||||
// the user may use the package message like so:
|
||||
//
|
||||
// p := message.NewPrinter(language.English)
|
||||
// p.Printf("You are %d minute(s) late.", 5)
|
||||
//
|
||||
// Which would print:
|
||||
// You are 5 minutes late.
|
||||
//
|
||||
//
|
||||
// This package is UNDER CONSTRUCTION and its API may change.
|
||||
package catalog // import "golang.org/x/text/message/catalog"
|
||||
|
||||
// TODO:
|
||||
// Some way to freeze a catalog.
|
||||
// - Locking on each lockup turns out to be about 50% of the total running time
|
||||
// for some of the benchmarks in the message package.
|
||||
// Consider these:
|
||||
// - Sequence type to support sequences in user-defined messages.
|
||||
// - Garbage collection: Remove dictionaries that can no longer be reached
|
||||
// as other dictionaries have been added that cover all possible keys.
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/internal"
|
||||
|
||||
"golang.org/x/text/internal/catmsg"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// A Catalog allows lookup of translated messages.
|
||||
type Catalog interface {
|
||||
// Languages returns all languages for which the Catalog contains variants.
|
||||
Languages() []language.Tag
|
||||
|
||||
// Matcher returns a Matcher for languages from this Catalog.
|
||||
Matcher() language.Matcher
|
||||
|
||||
// A Context is used for evaluating Messages.
|
||||
Context(tag language.Tag, r catmsg.Renderer) *Context
|
||||
|
||||
// This method also makes Catalog a private interface.
|
||||
lookup(tag language.Tag, key string) (data string, ok bool)
|
||||
}
|
||||
|
||||
// NewFromMap creates a Catalog from the given map. If a Dictionary is
|
||||
// underspecified the entry is retrieved from a parent language.
|
||||
func NewFromMap(dictionaries map[string]Dictionary, opts ...Option) (Catalog, error) {
|
||||
options := options{}
|
||||
for _, o := range opts {
|
||||
o(&options)
|
||||
}
|
||||
c := &catalog{
|
||||
dicts: map[language.Tag]Dictionary{},
|
||||
}
|
||||
_, hasFallback := dictionaries[options.fallback.String()]
|
||||
if hasFallback {
|
||||
// TODO: Should it be okay to not have a fallback language?
|
||||
// Catalog generators could enforce there is always a fallback.
|
||||
c.langs = append(c.langs, options.fallback)
|
||||
}
|
||||
for lang, dict := range dictionaries {
|
||||
tag, err := language.Parse(lang)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("catalog: invalid language tag %q", lang)
|
||||
}
|
||||
if _, ok := c.dicts[tag]; ok {
|
||||
return nil, fmt.Errorf("catalog: duplicate entry for tag %q after normalization", tag)
|
||||
}
|
||||
c.dicts[tag] = dict
|
||||
if !hasFallback || tag != options.fallback {
|
||||
c.langs = append(c.langs, tag)
|
||||
}
|
||||
}
|
||||
if hasFallback {
|
||||
internal.SortTags(c.langs[1:])
|
||||
} else {
|
||||
internal.SortTags(c.langs)
|
||||
}
|
||||
c.matcher = language.NewMatcher(c.langs)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// A Dictionary is a source of translations for a single language.
|
||||
type Dictionary interface {
|
||||
// Lookup returns a message compiled with catmsg.Compile for the given key.
|
||||
// It returns false for ok if such a message could not be found.
|
||||
Lookup(key string) (data string, ok bool)
|
||||
}
|
||||
|
||||
type catalog struct {
|
||||
langs []language.Tag
|
||||
dicts map[language.Tag]Dictionary
|
||||
macros store
|
||||
matcher language.Matcher
|
||||
}
|
||||
|
||||
func (c *catalog) Languages() []language.Tag { return c.langs }
|
||||
func (c *catalog) Matcher() language.Matcher { return c.matcher }
|
||||
|
||||
func (c *catalog) lookup(tag language.Tag, key string) (data string, ok bool) {
|
||||
for ; ; tag = tag.Parent() {
|
||||
if dict, ok := c.dicts[tag]; ok {
|
||||
if data, ok := dict.Lookup(key); ok {
|
||||
return data, true
|
||||
}
|
||||
}
|
||||
if tag == language.Und {
|
||||
break
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Context returns a Context for formatting messages.
|
||||
// Only one Message may be formatted per context at any given time.
|
||||
func (c *catalog) Context(tag language.Tag, r catmsg.Renderer) *Context {
|
||||
return &Context{
|
||||
cat: c,
|
||||
tag: tag,
|
||||
dec: catmsg.NewDecoder(tag, r, &dict{&c.macros, tag}),
|
||||
}
|
||||
}
|
||||
|
||||
// A Builder allows building a Catalog programmatically.
|
||||
type Builder struct {
|
||||
options
|
||||
matcher language.Matcher
|
||||
|
||||
index store
|
||||
macros store
|
||||
}
|
||||
|
||||
type options struct {
|
||||
fallback language.Tag
|
||||
}
|
||||
|
||||
// An Option configures Catalog behavior.
|
||||
type Option func(*options)
|
||||
|
||||
// Fallback specifies the default fallback language. The default is Und.
|
||||
func Fallback(tag language.Tag) Option {
|
||||
return func(o *options) { o.fallback = tag }
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// // Catalogs specifies one or more sources for a Catalog.
|
||||
// // Lookups are in order.
|
||||
// // This can be changed inserting a Catalog used for setting, which implements
|
||||
// // Loader, used for setting in the chain.
|
||||
// func Catalogs(d ...Loader) Option {
|
||||
// return nil
|
||||
// }
|
||||
//
|
||||
// func Delims(start, end string) Option {}
|
||||
//
|
||||
// func Dict(tag language.Tag, d ...Dictionary) Option
|
||||
|
||||
// NewBuilder returns an empty mutable Catalog.
|
||||
func NewBuilder(opts ...Option) *Builder {
|
||||
c := &Builder{}
|
||||
for _, o := range opts {
|
||||
o(&c.options)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// SetString is shorthand for Set(tag, key, String(msg)).
|
||||
func (c *Builder) SetString(tag language.Tag, key string, msg string) error {
|
||||
return c.set(tag, key, &c.index, String(msg))
|
||||
}
|
||||
|
||||
// Set sets the translation for the given language and key.
|
||||
//
|
||||
// When evaluation this message, the first Message in the sequence to msgs to
|
||||
// evaluate to a string will be the message returned.
|
||||
func (c *Builder) Set(tag language.Tag, key string, msg ...Message) error {
|
||||
return c.set(tag, key, &c.index, msg...)
|
||||
}
|
||||
|
||||
// SetMacro defines a Message that may be substituted in another message.
|
||||
// The arguments to a macro Message are passed as arguments in the
|
||||
// placeholder the form "${foo(arg1, arg2)}".
|
||||
func (c *Builder) SetMacro(tag language.Tag, name string, msg ...Message) error {
|
||||
return c.set(tag, name, &c.macros, msg...)
|
||||
}
|
||||
|
||||
// ErrNotFound indicates there was no message for the given key.
|
||||
var ErrNotFound = errors.New("catalog: message not found")
|
||||
|
||||
// String specifies a plain message string. It can be used as fallback if no
|
||||
// other strings match or as a simple standalone message.
|
||||
//
|
||||
// It is an error to pass more than one String in a message sequence.
|
||||
func String(name string) Message {
|
||||
return catmsg.String(name)
|
||||
}
|
||||
|
||||
// Var sets a variable that may be substituted in formatting patterns using
|
||||
// named substitution of the form "${name}". The name argument is used as a
|
||||
// fallback if the statements do not produce a match. The statement sequence may
|
||||
// not contain any Var calls.
|
||||
//
|
||||
// The name passed to a Var must be unique within message sequence.
|
||||
func Var(name string, msg ...Message) Message {
|
||||
return &catmsg.Var{Name: name, Message: firstInSequence(msg)}
|
||||
}
|
||||
|
||||
// Context returns a Context for formatting messages.
|
||||
// Only one Message may be formatted per context at any given time.
|
||||
func (b *Builder) Context(tag language.Tag, r catmsg.Renderer) *Context {
|
||||
return &Context{
|
||||
cat: b,
|
||||
tag: tag,
|
||||
dec: catmsg.NewDecoder(tag, r, &dict{&b.macros, tag}),
|
||||
}
|
||||
}
|
||||
|
||||
// A Context is used for evaluating Messages.
|
||||
// Only one Message may be formatted per context at any given time.
|
||||
type Context struct {
|
||||
cat Catalog
|
||||
tag language.Tag // TODO: use compact index.
|
||||
dec *catmsg.Decoder
|
||||
}
|
||||
|
||||
// Execute looks up and executes the message with the given key.
|
||||
// It returns ErrNotFound if no message could be found in the index.
|
||||
func (c *Context) Execute(key string) error {
|
||||
data, ok := c.cat.lookup(c.tag, key)
|
||||
if !ok {
|
||||
return ErrNotFound
|
||||
}
|
||||
return c.dec.Execute(data)
|
||||
}
|
@ -0,0 +1,129 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package catalog
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"golang.org/x/text/internal"
|
||||
"golang.org/x/text/internal/catmsg"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// Dictionary returns a Dictionary that returns the first Message, using the
|
||||
// given language tag, that matches:
|
||||
// 1. the last one registered by one of the Set methods
|
||||
// 2. returned by one of the Loaders
|
||||
// 3. repeat from 1. using the parent language
|
||||
// This approach allows messages to be underspecified.
|
||||
// func (c *Catalog) Dictionary(tag language.Tag) (Dictionary, error) {
|
||||
// // TODO: verify dictionary exists.
|
||||
// return &dict{&c.index, tag}, nil
|
||||
// }
|
||||
|
||||
type dict struct {
|
||||
s *store
|
||||
tag language.Tag // TODO: make compact tag.
|
||||
}
|
||||
|
||||
func (d *dict) Lookup(key string) (data string, ok bool) {
|
||||
return d.s.lookup(d.tag, key)
|
||||
}
|
||||
|
||||
func (b *Builder) lookup(tag language.Tag, key string) (data string, ok bool) {
|
||||
return b.index.lookup(tag, key)
|
||||
}
|
||||
|
||||
func (c *Builder) set(tag language.Tag, key string, s *store, msg ...Message) error {
|
||||
data, err := catmsg.Compile(tag, &dict{&c.macros, tag}, firstInSequence(msg))
|
||||
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
m := s.index[tag]
|
||||
if m == nil {
|
||||
m = msgMap{}
|
||||
if s.index == nil {
|
||||
s.index = map[language.Tag]msgMap{}
|
||||
}
|
||||
c.matcher = nil
|
||||
s.index[tag] = m
|
||||
}
|
||||
|
||||
m[key] = data
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Builder) Matcher() language.Matcher {
|
||||
c.index.mutex.RLock()
|
||||
m := c.matcher
|
||||
c.index.mutex.RUnlock()
|
||||
if m != nil {
|
||||
return m
|
||||
}
|
||||
|
||||
c.index.mutex.Lock()
|
||||
if c.matcher == nil {
|
||||
c.matcher = language.NewMatcher(c.unlockedLanguages())
|
||||
}
|
||||
m = c.matcher
|
||||
c.index.mutex.Unlock()
|
||||
return m
|
||||
}
|
||||
|
||||
type store struct {
|
||||
mutex sync.RWMutex
|
||||
index map[language.Tag]msgMap
|
||||
}
|
||||
|
||||
type msgMap map[string]string
|
||||
|
||||
func (s *store) lookup(tag language.Tag, key string) (data string, ok bool) {
|
||||
s.mutex.RLock()
|
||||
defer s.mutex.RUnlock()
|
||||
|
||||
for ; ; tag = tag.Parent() {
|
||||
if msgs, ok := s.index[tag]; ok {
|
||||
if msg, ok := msgs[key]; ok {
|
||||
return msg, true
|
||||
}
|
||||
}
|
||||
if tag == language.Und {
|
||||
break
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Languages returns all languages for which the Catalog contains variants.
|
||||
func (b *Builder) Languages() []language.Tag {
|
||||
s := &b.index
|
||||
s.mutex.RLock()
|
||||
defer s.mutex.RUnlock()
|
||||
|
||||
return b.unlockedLanguages()
|
||||
}
|
||||
|
||||
func (b *Builder) unlockedLanguages() []language.Tag {
|
||||
s := &b.index
|
||||
if len(s.index) == 0 {
|
||||
return nil
|
||||
}
|
||||
tags := make([]language.Tag, 0, len(s.index))
|
||||
_, hasFallback := s.index[b.options.fallback]
|
||||
offset := 0
|
||||
if hasFallback {
|
||||
tags = append(tags, b.options.fallback)
|
||||
offset = 1
|
||||
}
|
||||
for t := range s.index {
|
||||
if t != b.options.fallback {
|
||||
tags = append(tags, t)
|
||||
}
|
||||
}
|
||||
internal.SortTags(tags[offset:])
|
||||
return tags
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.9
|
||||
|
||||
package catalog
|
||||
|
||||
import "golang.org/x/text/internal/catmsg"
|
||||
|
||||
// A Message holds a collection of translations for the same phrase that may
|
||||
// vary based on the values of substitution arguments.
|
||||
type Message = catmsg.Message
|
||||
|
||||
type firstInSequence = catmsg.FirstOf
|
@ -0,0 +1,23 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.9
|
||||
|
||||
package catalog
|
||||
|
||||
import "golang.org/x/text/internal/catmsg"
|
||||
|
||||
// A Message holds a collection of translations for the same phrase that may
|
||||
// vary based on the values of substitution arguments.
|
||||
type Message interface {
|
||||
catmsg.Message
|
||||
}
|
||||
|
||||
func firstInSequence(m []Message) catmsg.Message {
|
||||
a := []catmsg.Message{}
|
||||
for _, m := range m {
|
||||
a = append(a, m)
|
||||
}
|
||||
return catmsg.FirstOf(a)
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package message implements formatted I/O for localized strings with functions
|
||||
// analogous to the fmt's print functions. It is a drop-in replacement for fmt.
|
||||
//
|
||||
//
|
||||
// Localized Formatting
|
||||
//
|
||||
// A format string can be localized by replacing any of the print functions of
|
||||
// fmt with an equivalent call to a Printer.
|
||||
//
|
||||
// p := message.NewPrinter(message.MatchLanguage("en"))
|
||||
// p.Println(123456.78) // Prints 123,456.78
|
||||
//
|
||||
// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row
|
||||
//
|
||||
// p := message.NewPrinter(message.MatchLanguage("nl"))
|
||||
// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter
|
||||
//
|
||||
// p := message.NewPrinter(message.MatchLanguage("bn"))
|
||||
// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮
|
||||
//
|
||||
// Printer currently supports numbers and specialized types for which packages
|
||||
// exist in x/text. Other builtin types such as time.Time and slices are
|
||||
// planned.
|
||||
//
|
||||
// Format strings largely have the same meaning as with fmt with the following
|
||||
// notable exceptions:
|
||||
// - flag # always resorts to fmt for printing
|
||||
// - verb 'f', 'e', 'g', 'd' use localized formatting unless the '#' flag is
|
||||
// specified.
|
||||
// - verb 'm' inserts a translation of a string argument.
|
||||
//
|
||||
// See package fmt for more options.
|
||||
//
|
||||
//
|
||||
// Translation
|
||||
//
|
||||
// The format strings that are passed to Printf, Sprintf, Fprintf, or Errorf
|
||||
// are used as keys to look up translations for the specified languages.
|
||||
// More on how these need to be specified below.
|
||||
//
|
||||
// One can use arbitrary keys to distinguish between otherwise ambiguous
|
||||
// strings:
|
||||
// p := message.NewPrinter(language.English)
|
||||
// p.Printf("archive(noun)") // Prints "archive"
|
||||
// p.Printf("archive(verb)") // Prints "archive"
|
||||
//
|
||||
// p := message.NewPrinter(language.German)
|
||||
// p.Printf("archive(noun)") // Prints "Archiv"
|
||||
// p.Printf("archive(verb)") // Prints "archivieren"
|
||||
//
|
||||
// To retain the fallback functionality, use Key:
|
||||
// p.Printf(message.Key("archive(noun)", "archive"))
|
||||
// p.Printf(message.Key("archive(verb)", "archive"))
|
||||
//
|
||||
//
|
||||
// Translation Pipeline
|
||||
//
|
||||
// Format strings that contain text need to be translated to support different
|
||||
// locales. The first step is to extract strings that need to be translated.
|
||||
//
|
||||
// 1. Install gotext
|
||||
// go get -u golang.org/x/text/cmd/gotext
|
||||
// gotext -help
|
||||
//
|
||||
// 2. Mark strings in your source to be translated by using message.Printer,
|
||||
// instead of the functions of the fmt package.
|
||||
//
|
||||
// 3. Extract the strings from your source
|
||||
//
|
||||
// gotext extract
|
||||
//
|
||||
// The output will be written to the textdata directory.
|
||||
//
|
||||
// 4. Send the files for translation
|
||||
//
|
||||
// It is planned to support multiple formats, but for now one will have to
|
||||
// rewrite the JSON output to the desired format.
|
||||
//
|
||||
// 5. Inject translations into program
|
||||
//
|
||||
// 6. Repeat from 2
|
||||
//
|
||||
// Right now this has to be done programmatically with calls to Set or
|
||||
// SetString. These functions as well as the methods defined in
|
||||
// see also package golang.org/x/text/message/catalog can be used to implement
|
||||
// either dynamic or static loading of messages.
|
||||
//
|
||||
//
|
||||
// Plural and Gender Forms
|
||||
//
|
||||
// Translated messages can vary based on the plural and gender forms of
|
||||
// substitution values. In general, it is up to the translators to provide
|
||||
// alternative translations for such forms. See the packages in
|
||||
// golang.org/x/text/feature and golang.org/x/text/message/catalog for more
|
||||
// information.
|
||||
//
|
||||
package message
|
@ -0,0 +1,510 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package message
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/format"
|
||||
)
|
||||
|
||||
const (
|
||||
ldigits = "0123456789abcdefx"
|
||||
udigits = "0123456789ABCDEFX"
|
||||
)
|
||||
|
||||
const (
|
||||
signed = true
|
||||
unsigned = false
|
||||
)
|
||||
|
||||
// A formatInfo is the raw formatter used by Printf etc.
|
||||
// It prints into a buffer that must be set up separately.
|
||||
type formatInfo struct {
|
||||
buf *bytes.Buffer
|
||||
|
||||
format.Parser
|
||||
|
||||
// intbuf is large enough to store %b of an int64 with a sign and
|
||||
// avoids padding at the end of the struct on 32 bit architectures.
|
||||
intbuf [68]byte
|
||||
}
|
||||
|
||||
func (f *formatInfo) init(buf *bytes.Buffer) {
|
||||
f.ClearFlags()
|
||||
f.buf = buf
|
||||
}
|
||||
|
||||
// writePadding generates n bytes of padding.
|
||||
func (f *formatInfo) writePadding(n int) {
|
||||
if n <= 0 { // No padding bytes needed.
|
||||
return
|
||||
}
|
||||
f.buf.Grow(n)
|
||||
// Decide which byte the padding should be filled with.
|
||||
padByte := byte(' ')
|
||||
if f.Zero {
|
||||
padByte = byte('0')
|
||||
}
|
||||
// Fill padding with padByte.
|
||||
for i := 0; i < n; i++ {
|
||||
f.buf.WriteByte(padByte) // TODO: make more efficient.
|
||||
}
|
||||
}
|
||||
|
||||
// pad appends b to f.buf, padded on left (!f.minus) or right (f.minus).
|
||||
func (f *formatInfo) pad(b []byte) {
|
||||
if !f.WidthPresent || f.Width == 0 {
|
||||
f.buf.Write(b)
|
||||
return
|
||||
}
|
||||
width := f.Width - utf8.RuneCount(b)
|
||||
if !f.Minus {
|
||||
// left padding
|
||||
f.writePadding(width)
|
||||
f.buf.Write(b)
|
||||
} else {
|
||||
// right padding
|
||||
f.buf.Write(b)
|
||||
f.writePadding(width)
|
||||
}
|
||||
}
|
||||
|
||||
// padString appends s to f.buf, padded on left (!f.minus) or right (f.minus).
|
||||
func (f *formatInfo) padString(s string) {
|
||||
if !f.WidthPresent || f.Width == 0 {
|
||||
f.buf.WriteString(s)
|
||||
return
|
||||
}
|
||||
width := f.Width - utf8.RuneCountInString(s)
|
||||
if !f.Minus {
|
||||
// left padding
|
||||
f.writePadding(width)
|
||||
f.buf.WriteString(s)
|
||||
} else {
|
||||
// right padding
|
||||
f.buf.WriteString(s)
|
||||
f.writePadding(width)
|
||||
}
|
||||
}
|
||||
|
||||
// fmt_boolean formats a boolean.
|
||||
func (f *formatInfo) fmt_boolean(v bool) {
|
||||
if v {
|
||||
f.padString("true")
|
||||
} else {
|
||||
f.padString("false")
|
||||
}
|
||||
}
|
||||
|
||||
// fmt_unicode formats a uint64 as "U+0078" or with f.sharp set as "U+0078 'x'".
|
||||
func (f *formatInfo) fmt_unicode(u uint64) {
|
||||
buf := f.intbuf[0:]
|
||||
|
||||
// With default precision set the maximum needed buf length is 18
|
||||
// for formatting -1 with %#U ("U+FFFFFFFFFFFFFFFF") which fits
|
||||
// into the already allocated intbuf with a capacity of 68 bytes.
|
||||
prec := 4
|
||||
if f.PrecPresent && f.Prec > 4 {
|
||||
prec = f.Prec
|
||||
// Compute space needed for "U+" , number, " '", character, "'".
|
||||
width := 2 + prec + 2 + utf8.UTFMax + 1
|
||||
if width > len(buf) {
|
||||
buf = make([]byte, width)
|
||||
}
|
||||
}
|
||||
|
||||
// Format into buf, ending at buf[i]. Formatting numbers is easier right-to-left.
|
||||
i := len(buf)
|
||||
|
||||
// For %#U we want to add a space and a quoted character at the end of the buffer.
|
||||
if f.Sharp && u <= utf8.MaxRune && strconv.IsPrint(rune(u)) {
|
||||
i--
|
||||
buf[i] = '\''
|
||||
i -= utf8.RuneLen(rune(u))
|
||||
utf8.EncodeRune(buf[i:], rune(u))
|
||||
i--
|
||||
buf[i] = '\''
|
||||
i--
|
||||
buf[i] = ' '
|
||||
}
|
||||
// Format the Unicode code point u as a hexadecimal number.
|
||||
for u >= 16 {
|
||||
i--
|
||||
buf[i] = udigits[u&0xF]
|
||||
prec--
|
||||
u >>= 4
|
||||
}
|
||||
i--
|
||||
buf[i] = udigits[u]
|
||||
prec--
|
||||
// Add zeros in front of the number until requested precision is reached.
|
||||
for prec > 0 {
|
||||
i--
|
||||
buf[i] = '0'
|
||||
prec--
|
||||
}
|
||||
// Add a leading "U+".
|
||||
i--
|
||||
buf[i] = '+'
|
||||
i--
|
||||
buf[i] = 'U'
|
||||
|
||||
oldZero := f.Zero
|
||||
f.Zero = false
|
||||
f.pad(buf[i:])
|
||||
f.Zero = oldZero
|
||||
}
|
||||
|
||||
// fmt_integer formats signed and unsigned integers.
|
||||
func (f *formatInfo) fmt_integer(u uint64, base int, isSigned bool, digits string) {
|
||||
negative := isSigned && int64(u) < 0
|
||||
if negative {
|
||||
u = -u
|
||||
}
|
||||
|
||||
buf := f.intbuf[0:]
|
||||
// The already allocated f.intbuf with a capacity of 68 bytes
|
||||
// is large enough for integer formatting when no precision or width is set.
|
||||
if f.WidthPresent || f.PrecPresent {
|
||||
// Account 3 extra bytes for possible addition of a sign and "0x".
|
||||
width := 3 + f.Width + f.Prec // wid and prec are always positive.
|
||||
if width > len(buf) {
|
||||
// We're going to need a bigger boat.
|
||||
buf = make([]byte, width)
|
||||
}
|
||||
}
|
||||
|
||||
// Two ways to ask for extra leading zero digits: %.3d or %03d.
|
||||
// If both are specified the f.zero flag is ignored and
|
||||
// padding with spaces is used instead.
|
||||
prec := 0
|
||||
if f.PrecPresent {
|
||||
prec = f.Prec
|
||||
// Precision of 0 and value of 0 means "print nothing" but padding.
|
||||
if prec == 0 && u == 0 {
|
||||
oldZero := f.Zero
|
||||
f.Zero = false
|
||||
f.writePadding(f.Width)
|
||||
f.Zero = oldZero
|
||||
return
|
||||
}
|
||||
} else if f.Zero && f.WidthPresent {
|
||||
prec = f.Width
|
||||
if negative || f.Plus || f.Space {
|
||||
prec-- // leave room for sign
|
||||
}
|
||||
}
|
||||
|
||||
// Because printing is easier right-to-left: format u into buf, ending at buf[i].
|
||||
// We could make things marginally faster by splitting the 32-bit case out
|
||||
// into a separate block but it's not worth the duplication, so u has 64 bits.
|
||||
i := len(buf)
|
||||
// Use constants for the division and modulo for more efficient code.
|
||||
// Switch cases ordered by popularity.
|
||||
switch base {
|
||||
case 10:
|
||||
for u >= 10 {
|
||||
i--
|
||||
next := u / 10
|
||||
buf[i] = byte('0' + u - next*10)
|
||||
u = next
|
||||
}
|
||||
case 16:
|
||||
for u >= 16 {
|
||||
i--
|
||||
buf[i] = digits[u&0xF]
|
||||
u >>= 4
|
||||
}
|
||||
case 8:
|
||||
for u >= 8 {
|
||||
i--
|
||||
buf[i] = byte('0' + u&7)
|
||||
u >>= 3
|
||||
}
|
||||
case 2:
|
||||
for u >= 2 {
|
||||
i--
|
||||
buf[i] = byte('0' + u&1)
|
||||
u >>= 1
|
||||
}
|
||||
default:
|
||||
panic("fmt: unknown base; can't happen")
|
||||
}
|
||||
i--
|
||||
buf[i] = digits[u]
|
||||
for i > 0 && prec > len(buf)-i {
|
||||
i--
|
||||
buf[i] = '0'
|
||||
}
|
||||
|
||||
// Various prefixes: 0x, -, etc.
|
||||
if f.Sharp {
|
||||
switch base {
|
||||
case 8:
|
||||
if buf[i] != '0' {
|
||||
i--
|
||||
buf[i] = '0'
|
||||
}
|
||||
case 16:
|
||||
// Add a leading 0x or 0X.
|
||||
i--
|
||||
buf[i] = digits[16]
|
||||
i--
|
||||
buf[i] = '0'
|
||||
}
|
||||
}
|
||||
|
||||
if negative {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
} else if f.Plus {
|
||||
i--
|
||||
buf[i] = '+'
|
||||
} else if f.Space {
|
||||
i--
|
||||
buf[i] = ' '
|
||||
}
|
||||
|
||||
// Left padding with zeros has already been handled like precision earlier
|
||||
// or the f.zero flag is ignored due to an explicitly set precision.
|
||||
oldZero := f.Zero
|
||||
f.Zero = false
|
||||
f.pad(buf[i:])
|
||||
f.Zero = oldZero
|
||||
}
|
||||
|
||||
// truncate truncates the string to the specified precision, if present.
|
||||
func (f *formatInfo) truncate(s string) string {
|
||||
if f.PrecPresent {
|
||||
n := f.Prec
|
||||
for i := range s {
|
||||
n--
|
||||
if n < 0 {
|
||||
return s[:i]
|
||||
}
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// fmt_s formats a string.
|
||||
func (f *formatInfo) fmt_s(s string) {
|
||||
s = f.truncate(s)
|
||||
f.padString(s)
|
||||
}
|
||||
|
||||
// fmt_sbx formats a string or byte slice as a hexadecimal encoding of its bytes.
|
||||
func (f *formatInfo) fmt_sbx(s string, b []byte, digits string) {
|
||||
length := len(b)
|
||||
if b == nil {
|
||||
// No byte slice present. Assume string s should be encoded.
|
||||
length = len(s)
|
||||
}
|
||||
// Set length to not process more bytes than the precision demands.
|
||||
if f.PrecPresent && f.Prec < length {
|
||||
length = f.Prec
|
||||
}
|
||||
// Compute width of the encoding taking into account the f.sharp and f.space flag.
|
||||
width := 2 * length
|
||||
if width > 0 {
|
||||
if f.Space {
|
||||
// Each element encoded by two hexadecimals will get a leading 0x or 0X.
|
||||
if f.Sharp {
|
||||
width *= 2
|
||||
}
|
||||
// Elements will be separated by a space.
|
||||
width += length - 1
|
||||
} else if f.Sharp {
|
||||
// Only a leading 0x or 0X will be added for the whole string.
|
||||
width += 2
|
||||
}
|
||||
} else { // The byte slice or string that should be encoded is empty.
|
||||
if f.WidthPresent {
|
||||
f.writePadding(f.Width)
|
||||
}
|
||||
return
|
||||
}
|
||||
// Handle padding to the left.
|
||||
if f.WidthPresent && f.Width > width && !f.Minus {
|
||||
f.writePadding(f.Width - width)
|
||||
}
|
||||
// Write the encoding directly into the output buffer.
|
||||
buf := f.buf
|
||||
if f.Sharp {
|
||||
// Add leading 0x or 0X.
|
||||
buf.WriteByte('0')
|
||||
buf.WriteByte(digits[16])
|
||||
}
|
||||
var c byte
|
||||
for i := 0; i < length; i++ {
|
||||
if f.Space && i > 0 {
|
||||
// Separate elements with a space.
|
||||
buf.WriteByte(' ')
|
||||
if f.Sharp {
|
||||
// Add leading 0x or 0X for each element.
|
||||
buf.WriteByte('0')
|
||||
buf.WriteByte(digits[16])
|
||||
}
|
||||
}
|
||||
if b != nil {
|
||||
c = b[i] // Take a byte from the input byte slice.
|
||||
} else {
|
||||
c = s[i] // Take a byte from the input string.
|
||||
}
|
||||
// Encode each byte as two hexadecimal digits.
|
||||
buf.WriteByte(digits[c>>4])
|
||||
buf.WriteByte(digits[c&0xF])
|
||||
}
|
||||
// Handle padding to the right.
|
||||
if f.WidthPresent && f.Width > width && f.Minus {
|
||||
f.writePadding(f.Width - width)
|
||||
}
|
||||
}
|
||||
|
||||
// fmt_sx formats a string as a hexadecimal encoding of its bytes.
|
||||
func (f *formatInfo) fmt_sx(s, digits string) {
|
||||
f.fmt_sbx(s, nil, digits)
|
||||
}
|
||||
|
||||
// fmt_bx formats a byte slice as a hexadecimal encoding of its bytes.
|
||||
func (f *formatInfo) fmt_bx(b []byte, digits string) {
|
||||
f.fmt_sbx("", b, digits)
|
||||
}
|
||||
|
||||
// fmt_q formats a string as a double-quoted, escaped Go string constant.
|
||||
// If f.sharp is set a raw (backquoted) string may be returned instead
|
||||
// if the string does not contain any control characters other than tab.
|
||||
func (f *formatInfo) fmt_q(s string) {
|
||||
s = f.truncate(s)
|
||||
if f.Sharp && strconv.CanBackquote(s) {
|
||||
f.padString("`" + s + "`")
|
||||
return
|
||||
}
|
||||
buf := f.intbuf[:0]
|
||||
if f.Plus {
|
||||
f.pad(strconv.AppendQuoteToASCII(buf, s))
|
||||
} else {
|
||||
f.pad(strconv.AppendQuote(buf, s))
|
||||
}
|
||||
}
|
||||
|
||||
// fmt_c formats an integer as a Unicode character.
|
||||
// If the character is not valid Unicode, it will print '\ufffd'.
|
||||
func (f *formatInfo) fmt_c(c uint64) {
|
||||
r := rune(c)
|
||||
if c > utf8.MaxRune {
|
||||
r = utf8.RuneError
|
||||
}
|
||||
buf := f.intbuf[:0]
|
||||
w := utf8.EncodeRune(buf[:utf8.UTFMax], r)
|
||||
f.pad(buf[:w])
|
||||
}
|
||||
|
||||
// fmt_qc formats an integer as a single-quoted, escaped Go character constant.
|
||||
// If the character is not valid Unicode, it will print '\ufffd'.
|
||||
func (f *formatInfo) fmt_qc(c uint64) {
|
||||
r := rune(c)
|
||||
if c > utf8.MaxRune {
|
||||
r = utf8.RuneError
|
||||
}
|
||||
buf := f.intbuf[:0]
|
||||
if f.Plus {
|
||||
f.pad(strconv.AppendQuoteRuneToASCII(buf, r))
|
||||
} else {
|
||||
f.pad(strconv.AppendQuoteRune(buf, r))
|
||||
}
|
||||
}
|
||||
|
||||
// fmt_float formats a float64. It assumes that verb is a valid format specifier
|
||||
// for strconv.AppendFloat and therefore fits into a byte.
|
||||
func (f *formatInfo) fmt_float(v float64, size int, verb rune, prec int) {
|
||||
// Explicit precision in format specifier overrules default precision.
|
||||
if f.PrecPresent {
|
||||
prec = f.Prec
|
||||
}
|
||||
// Format number, reserving space for leading + sign if needed.
|
||||
num := strconv.AppendFloat(f.intbuf[:1], v, byte(verb), prec, size)
|
||||
if num[1] == '-' || num[1] == '+' {
|
||||
num = num[1:]
|
||||
} else {
|
||||
num[0] = '+'
|
||||
}
|
||||
// f.space means to add a leading space instead of a "+" sign unless
|
||||
// the sign is explicitly asked for by f.plus.
|
||||
if f.Space && num[0] == '+' && !f.Plus {
|
||||
num[0] = ' '
|
||||
}
|
||||
// Special handling for infinities and NaN,
|
||||
// which don't look like a number so shouldn't be padded with zeros.
|
||||
if num[1] == 'I' || num[1] == 'N' {
|
||||
oldZero := f.Zero
|
||||
f.Zero = false
|
||||
// Remove sign before NaN if not asked for.
|
||||
if num[1] == 'N' && !f.Space && !f.Plus {
|
||||
num = num[1:]
|
||||
}
|
||||
f.pad(num)
|
||||
f.Zero = oldZero
|
||||
return
|
||||
}
|
||||
// The sharp flag forces printing a decimal point for non-binary formats
|
||||
// and retains trailing zeros, which we may need to restore.
|
||||
if f.Sharp && verb != 'b' {
|
||||
digits := 0
|
||||
switch verb {
|
||||
case 'v', 'g', 'G':
|
||||
digits = prec
|
||||
// If no precision is set explicitly use a precision of 6.
|
||||
if digits == -1 {
|
||||
digits = 6
|
||||
}
|
||||
}
|
||||
|
||||
// Buffer pre-allocated with enough room for
|
||||
// exponent notations of the form "e+123".
|
||||
var tailBuf [5]byte
|
||||
tail := tailBuf[:0]
|
||||
|
||||
hasDecimalPoint := false
|
||||
// Starting from i = 1 to skip sign at num[0].
|
||||
for i := 1; i < len(num); i++ {
|
||||
switch num[i] {
|
||||
case '.':
|
||||
hasDecimalPoint = true
|
||||
case 'e', 'E':
|
||||
tail = append(tail, num[i:]...)
|
||||
num = num[:i]
|
||||
default:
|
||||
digits--
|
||||
}
|
||||
}
|
||||
if !hasDecimalPoint {
|
||||
num = append(num, '.')
|
||||
}
|
||||
for digits > 0 {
|
||||
num = append(num, '0')
|
||||
digits--
|
||||
}
|
||||
num = append(num, tail...)
|
||||
}
|
||||
// We want a sign if asked for and if the sign is not positive.
|
||||
if f.Plus || num[0] != '+' {
|
||||
// If we're zero padding to the left we want the sign before the leading zeros.
|
||||
// Achieve this by writing the sign out and then padding the unsigned number.
|
||||
if f.Zero && f.WidthPresent && f.Width > len(num) {
|
||||
f.buf.WriteByte(num[0])
|
||||
f.writePadding(f.Width - len(num))
|
||||
f.buf.Write(num[1:])
|
||||
return
|
||||
}
|
||||
f.pad(num)
|
||||
return
|
||||
}
|
||||
// No sign to show and the number is positive; just print the unsigned number.
|
||||
f.pad(num[1:])
|
||||
}
|
@ -0,0 +1,193 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package message // import "golang.org/x/text/message"
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
// Include features to facilitate generated catalogs.
|
||||
_ "golang.org/x/text/feature/plural"
|
||||
|
||||
"golang.org/x/text/internal/number"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message/catalog"
|
||||
)
|
||||
|
||||
// A Printer implements language-specific formatted I/O analogous to the fmt
|
||||
// package.
|
||||
type Printer struct {
|
||||
// the language
|
||||
tag language.Tag
|
||||
|
||||
toDecimal number.Formatter
|
||||
toScientific number.Formatter
|
||||
|
||||
cat catalog.Catalog
|
||||
}
|
||||
|
||||
type options struct {
|
||||
cat catalog.Catalog
|
||||
// TODO:
|
||||
// - allow %s to print integers in written form (tables are likely too large
|
||||
// to enable this by default).
|
||||
// - list behavior
|
||||
//
|
||||
}
|
||||
|
||||
// An Option defines an option of a Printer.
|
||||
type Option func(o *options)
|
||||
|
||||
// Catalog defines the catalog to be used.
|
||||
func Catalog(c catalog.Catalog) Option {
|
||||
return func(o *options) { o.cat = c }
|
||||
}
|
||||
|
||||
// NewPrinter returns a Printer that formats messages tailored to language t.
|
||||
func NewPrinter(t language.Tag, opts ...Option) *Printer {
|
||||
options := &options{
|
||||
cat: DefaultCatalog,
|
||||
}
|
||||
for _, o := range opts {
|
||||
o(options)
|
||||
}
|
||||
p := &Printer{
|
||||
tag: t,
|
||||
cat: options.cat,
|
||||
}
|
||||
p.toDecimal.InitDecimal(t)
|
||||
p.toScientific.InitScientific(t)
|
||||
return p
|
||||
}
|
||||
|
||||
// Sprint is like fmt.Sprint, but using language-specific formatting.
|
||||
func (p *Printer) Sprint(a ...interface{}) string {
|
||||
pp := newPrinter(p)
|
||||
pp.doPrint(a)
|
||||
s := pp.String()
|
||||
pp.free()
|
||||
return s
|
||||
}
|
||||
|
||||
// Fprint is like fmt.Fprint, but using language-specific formatting.
|
||||
func (p *Printer) Fprint(w io.Writer, a ...interface{}) (n int, err error) {
|
||||
pp := newPrinter(p)
|
||||
pp.doPrint(a)
|
||||
n64, err := io.Copy(w, &pp.Buffer)
|
||||
pp.free()
|
||||
return int(n64), err
|
||||
}
|
||||
|
||||
// Print is like fmt.Print, but using language-specific formatting.
|
||||
func (p *Printer) Print(a ...interface{}) (n int, err error) {
|
||||
return p.Fprint(os.Stdout, a...)
|
||||
}
|
||||
|
||||
// Sprintln is like fmt.Sprintln, but using language-specific formatting.
|
||||
func (p *Printer) Sprintln(a ...interface{}) string {
|
||||
pp := newPrinter(p)
|
||||
pp.doPrintln(a)
|
||||
s := pp.String()
|
||||
pp.free()
|
||||
return s
|
||||
}
|
||||
|
||||
// Fprintln is like fmt.Fprintln, but using language-specific formatting.
|
||||
func (p *Printer) Fprintln(w io.Writer, a ...interface{}) (n int, err error) {
|
||||
pp := newPrinter(p)
|
||||
pp.doPrintln(a)
|
||||
n64, err := io.Copy(w, &pp.Buffer)
|
||||
pp.free()
|
||||
return int(n64), err
|
||||
}
|
||||
|
||||
// Println is like fmt.Println, but using language-specific formatting.
|
||||
func (p *Printer) Println(a ...interface{}) (n int, err error) {
|
||||
return p.Fprintln(os.Stdout, a...)
|
||||
}
|
||||
|
||||
// Sprintf is like fmt.Sprintf, but using language-specific formatting.
|
||||
func (p *Printer) Sprintf(key Reference, a ...interface{}) string {
|
||||
pp := newPrinter(p)
|
||||
lookupAndFormat(pp, key, a)
|
||||
s := pp.String()
|
||||
pp.free()
|
||||
return s
|
||||
}
|
||||
|
||||
// Fprintf is like fmt.Fprintf, but using language-specific formatting.
|
||||
func (p *Printer) Fprintf(w io.Writer, key Reference, a ...interface{}) (n int, err error) {
|
||||
pp := newPrinter(p)
|
||||
lookupAndFormat(pp, key, a)
|
||||
n, err = w.Write(pp.Bytes())
|
||||
pp.free()
|
||||
return n, err
|
||||
|
||||
}
|
||||
|
||||
// Printf is like fmt.Printf, but using language-specific formatting.
|
||||
func (p *Printer) Printf(key Reference, a ...interface{}) (n int, err error) {
|
||||
pp := newPrinter(p)
|
||||
lookupAndFormat(pp, key, a)
|
||||
n, err = os.Stdout.Write(pp.Bytes())
|
||||
pp.free()
|
||||
return n, err
|
||||
}
|
||||
|
||||
func lookupAndFormat(p *printer, r Reference, a []interface{}) {
|
||||
p.fmt.Reset(a)
|
||||
var id, msg string
|
||||
switch v := r.(type) {
|
||||
case string:
|
||||
id, msg = v, v
|
||||
case key:
|
||||
id, msg = v.id, v.fallback
|
||||
default:
|
||||
panic("key argument is not a Reference")
|
||||
}
|
||||
|
||||
if p.catContext.Execute(id) == catalog.ErrNotFound {
|
||||
if p.catContext.Execute(msg) == catalog.ErrNotFound {
|
||||
p.Render(msg)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type rawPrinter struct {
|
||||
p *printer
|
||||
}
|
||||
|
||||
func (p rawPrinter) Render(msg string) { p.p.WriteString(msg) }
|
||||
func (p rawPrinter) Arg(i int) interface{} { return nil }
|
||||
|
||||
// Arg implements catmsg.Renderer.
|
||||
func (p *printer) Arg(i int) interface{} { // TODO, also return "ok" bool
|
||||
i--
|
||||
if uint(i) < uint(len(p.fmt.Args)) {
|
||||
return p.fmt.Args[i]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Render implements catmsg.Renderer.
|
||||
func (p *printer) Render(msg string) {
|
||||
p.doPrintf(msg)
|
||||
}
|
||||
|
||||
// A Reference is a string or a message reference.
|
||||
type Reference interface {
|
||||
// TODO: also allow []string
|
||||
}
|
||||
|
||||
// Key creates a message Reference for a message where the given id is used for
|
||||
// message lookup and the fallback is returned when no matches are found.
|
||||
func Key(id string, fallback string) Reference {
|
||||
return key{id, fallback}
|
||||
}
|
||||
|
||||
type key struct {
|
||||
id, fallback string
|
||||
}
|
@ -0,0 +1,984 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package message
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt" // TODO: consider copying interfaces from package fmt to avoid dependency.
|
||||
"math"
|
||||
"reflect"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/format"
|
||||
"golang.org/x/text/internal/number"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message/catalog"
|
||||
)
|
||||
|
||||
// Strings for use with buffer.WriteString.
|
||||
// This is less overhead than using buffer.Write with byte arrays.
|
||||
const (
|
||||
commaSpaceString = ", "
|
||||
nilAngleString = "<nil>"
|
||||
nilParenString = "(nil)"
|
||||
nilString = "nil"
|
||||
mapString = "map["
|
||||
percentBangString = "%!"
|
||||
missingString = "(MISSING)"
|
||||
badIndexString = "(BADINDEX)"
|
||||
panicString = "(PANIC="
|
||||
extraString = "%!(EXTRA "
|
||||
badWidthString = "%!(BADWIDTH)"
|
||||
badPrecString = "%!(BADPREC)"
|
||||
noVerbString = "%!(NOVERB)"
|
||||
|
||||
invReflectString = "<invalid reflect.Value>"
|
||||
)
|
||||
|
||||
var printerPool = sync.Pool{
|
||||
New: func() interface{} { return new(printer) },
|
||||
}
|
||||
|
||||
// newPrinter allocates a new printer struct or grabs a cached one.
|
||||
func newPrinter(pp *Printer) *printer {
|
||||
p := printerPool.Get().(*printer)
|
||||
p.Printer = *pp
|
||||
// TODO: cache most of the following call.
|
||||
p.catContext = pp.cat.Context(pp.tag, p)
|
||||
|
||||
p.panicking = false
|
||||
p.erroring = false
|
||||
p.fmt.init(&p.Buffer)
|
||||
return p
|
||||
}
|
||||
|
||||
// free saves used printer structs in printerFree; avoids an allocation per invocation.
|
||||
func (p *printer) free() {
|
||||
p.Buffer.Reset()
|
||||
p.arg = nil
|
||||
p.value = reflect.Value{}
|
||||
printerPool.Put(p)
|
||||
}
|
||||
|
||||
// printer is used to store a printer's state.
|
||||
// It implements "golang.org/x/text/internal/format".State.
|
||||
type printer struct {
|
||||
Printer
|
||||
|
||||
// the context for looking up message translations
|
||||
catContext *catalog.Context
|
||||
|
||||
// buffer for accumulating output.
|
||||
bytes.Buffer
|
||||
|
||||
// arg holds the current item, as an interface{}.
|
||||
arg interface{}
|
||||
// value is used instead of arg for reflect values.
|
||||
value reflect.Value
|
||||
|
||||
// fmt is used to format basic items such as integers or strings.
|
||||
fmt formatInfo
|
||||
|
||||
// panicking is set by catchPanic to avoid infinite panic, recover, panic, ... recursion.
|
||||
panicking bool
|
||||
// erroring is set when printing an error string to guard against calling handleMethods.
|
||||
erroring bool
|
||||
}
|
||||
|
||||
// Language implements "golang.org/x/text/internal/format".State.
|
||||
func (p *printer) Language() language.Tag { return p.tag }
|
||||
|
||||
func (p *printer) Width() (wid int, ok bool) { return p.fmt.Width, p.fmt.WidthPresent }
|
||||
|
||||
func (p *printer) Precision() (prec int, ok bool) { return p.fmt.Prec, p.fmt.PrecPresent }
|
||||
|
||||
func (p *printer) Flag(b int) bool {
|
||||
switch b {
|
||||
case '-':
|
||||
return p.fmt.Minus
|
||||
case '+':
|
||||
return p.fmt.Plus || p.fmt.PlusV
|
||||
case '#':
|
||||
return p.fmt.Sharp || p.fmt.SharpV
|
||||
case ' ':
|
||||
return p.fmt.Space
|
||||
case '0':
|
||||
return p.fmt.Zero
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// getField gets the i'th field of the struct value.
|
||||
// If the field is itself is an interface, return a value for
|
||||
// the thing inside the interface, not the interface itself.
|
||||
func getField(v reflect.Value, i int) reflect.Value {
|
||||
val := v.Field(i)
|
||||
if val.Kind() == reflect.Interface && !val.IsNil() {
|
||||
val = val.Elem()
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
func (p *printer) unknownType(v reflect.Value) {
|
||||
if !v.IsValid() {
|
||||
p.WriteString(nilAngleString)
|
||||
return
|
||||
}
|
||||
p.WriteByte('?')
|
||||
p.WriteString(v.Type().String())
|
||||
p.WriteByte('?')
|
||||
}
|
||||
|
||||
func (p *printer) badVerb(verb rune) {
|
||||
p.erroring = true
|
||||
p.WriteString(percentBangString)
|
||||
p.WriteRune(verb)
|
||||
p.WriteByte('(')
|
||||
switch {
|
||||
case p.arg != nil:
|
||||
p.WriteString(reflect.TypeOf(p.arg).String())
|
||||
p.WriteByte('=')
|
||||
p.printArg(p.arg, 'v')
|
||||
case p.value.IsValid():
|
||||
p.WriteString(p.value.Type().String())
|
||||
p.WriteByte('=')
|
||||
p.printValue(p.value, 'v', 0)
|
||||
default:
|
||||
p.WriteString(nilAngleString)
|
||||
}
|
||||
p.WriteByte(')')
|
||||
p.erroring = false
|
||||
}
|
||||
|
||||
func (p *printer) fmtBool(v bool, verb rune) {
|
||||
switch verb {
|
||||
case 't', 'v':
|
||||
p.fmt.fmt_boolean(v)
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x or
|
||||
// not, as requested, by temporarily setting the sharp flag.
|
||||
func (p *printer) fmt0x64(v uint64, leading0x bool) {
|
||||
sharp := p.fmt.Sharp
|
||||
p.fmt.Sharp = leading0x
|
||||
p.fmt.fmt_integer(v, 16, unsigned, ldigits)
|
||||
p.fmt.Sharp = sharp
|
||||
}
|
||||
|
||||
// fmtInteger formats a signed or unsigned integer.
|
||||
func (p *printer) fmtInteger(v uint64, isSigned bool, verb rune) {
|
||||
switch verb {
|
||||
case 'v':
|
||||
if p.fmt.SharpV && !isSigned {
|
||||
p.fmt0x64(v, true)
|
||||
return
|
||||
}
|
||||
fallthrough
|
||||
case 'd':
|
||||
if p.fmt.Sharp || p.fmt.SharpV {
|
||||
p.fmt.fmt_integer(v, 10, isSigned, ldigits)
|
||||
} else {
|
||||
p.fmtDecimalInt(v, isSigned)
|
||||
}
|
||||
case 'b':
|
||||
p.fmt.fmt_integer(v, 2, isSigned, ldigits)
|
||||
case 'o':
|
||||
p.fmt.fmt_integer(v, 8, isSigned, ldigits)
|
||||
case 'x':
|
||||
p.fmt.fmt_integer(v, 16, isSigned, ldigits)
|
||||
case 'X':
|
||||
p.fmt.fmt_integer(v, 16, isSigned, udigits)
|
||||
case 'c':
|
||||
p.fmt.fmt_c(v)
|
||||
case 'q':
|
||||
if v <= utf8.MaxRune {
|
||||
p.fmt.fmt_qc(v)
|
||||
} else {
|
||||
p.badVerb(verb)
|
||||
}
|
||||
case 'U':
|
||||
p.fmt.fmt_unicode(v)
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
// fmtFloat formats a float. The default precision for each verb
|
||||
// is specified as last argument in the call to fmt_float.
|
||||
func (p *printer) fmtFloat(v float64, size int, verb rune) {
|
||||
switch verb {
|
||||
case 'b':
|
||||
p.fmt.fmt_float(v, size, verb, -1)
|
||||
case 'v':
|
||||
verb = 'g'
|
||||
fallthrough
|
||||
case 'g', 'G':
|
||||
if p.fmt.Sharp || p.fmt.SharpV {
|
||||
p.fmt.fmt_float(v, size, verb, -1)
|
||||
} else {
|
||||
p.fmtVariableFloat(v, size)
|
||||
}
|
||||
case 'e', 'E':
|
||||
if p.fmt.Sharp || p.fmt.SharpV {
|
||||
p.fmt.fmt_float(v, size, verb, 6)
|
||||
} else {
|
||||
p.fmtScientific(v, size, 6)
|
||||
}
|
||||
case 'f', 'F':
|
||||
if p.fmt.Sharp || p.fmt.SharpV {
|
||||
p.fmt.fmt_float(v, size, verb, 6)
|
||||
} else {
|
||||
p.fmtDecimalFloat(v, size, 6)
|
||||
}
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) setFlags(f *number.Formatter) {
|
||||
f.Flags &^= number.ElideSign
|
||||
if p.fmt.Plus || p.fmt.Space {
|
||||
f.Flags |= number.AlwaysSign
|
||||
if !p.fmt.Plus {
|
||||
f.Flags |= number.ElideSign
|
||||
}
|
||||
} else {
|
||||
f.Flags &^= number.AlwaysSign
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) updatePadding(f *number.Formatter) {
|
||||
f.Flags &^= number.PadMask
|
||||
if p.fmt.Minus {
|
||||
f.Flags |= number.PadAfterSuffix
|
||||
} else {
|
||||
f.Flags |= number.PadBeforePrefix
|
||||
}
|
||||
f.PadRune = ' '
|
||||
f.FormatWidth = uint16(p.fmt.Width)
|
||||
}
|
||||
|
||||
func (p *printer) initDecimal(minFrac, maxFrac int) {
|
||||
f := &p.toDecimal
|
||||
f.MinIntegerDigits = 1
|
||||
f.MaxIntegerDigits = 0
|
||||
f.MinFractionDigits = uint8(minFrac)
|
||||
f.MaxFractionDigits = int16(maxFrac)
|
||||
p.setFlags(f)
|
||||
f.PadRune = 0
|
||||
if p.fmt.WidthPresent {
|
||||
if p.fmt.Zero {
|
||||
wid := p.fmt.Width
|
||||
// Use significant integers for this.
|
||||
// TODO: this is not the same as width, but so be it.
|
||||
if f.MinFractionDigits > 0 {
|
||||
wid -= 1 + int(f.MinFractionDigits)
|
||||
}
|
||||
if p.fmt.Plus || p.fmt.Space {
|
||||
wid--
|
||||
}
|
||||
if wid > 0 && wid > int(f.MinIntegerDigits) {
|
||||
f.MinIntegerDigits = uint8(wid)
|
||||
}
|
||||
}
|
||||
p.updatePadding(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) initScientific(minFrac, maxFrac int) {
|
||||
f := &p.toScientific
|
||||
if maxFrac < 0 {
|
||||
f.SetPrecision(maxFrac)
|
||||
} else {
|
||||
f.SetPrecision(maxFrac + 1)
|
||||
f.MinFractionDigits = uint8(minFrac)
|
||||
f.MaxFractionDigits = int16(maxFrac)
|
||||
}
|
||||
f.MinExponentDigits = 2
|
||||
p.setFlags(f)
|
||||
f.PadRune = 0
|
||||
if p.fmt.WidthPresent {
|
||||
f.Flags &^= number.PadMask
|
||||
if p.fmt.Zero {
|
||||
f.PadRune = f.Digit(0)
|
||||
f.Flags |= number.PadAfterPrefix
|
||||
} else {
|
||||
f.PadRune = ' '
|
||||
f.Flags |= number.PadBeforePrefix
|
||||
}
|
||||
p.updatePadding(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) fmtDecimalInt(v uint64, isSigned bool) {
|
||||
var d number.Decimal
|
||||
|
||||
f := &p.toDecimal
|
||||
if p.fmt.PrecPresent {
|
||||
p.setFlags(f)
|
||||
f.MinIntegerDigits = uint8(p.fmt.Prec)
|
||||
f.MaxIntegerDigits = 0
|
||||
f.MinFractionDigits = 0
|
||||
f.MaxFractionDigits = 0
|
||||
if p.fmt.WidthPresent {
|
||||
p.updatePadding(f)
|
||||
}
|
||||
} else {
|
||||
p.initDecimal(0, 0)
|
||||
}
|
||||
d.ConvertInt(p.toDecimal.RoundingContext, isSigned, v)
|
||||
|
||||
out := p.toDecimal.Format([]byte(nil), &d)
|
||||
p.Buffer.Write(out)
|
||||
}
|
||||
|
||||
func (p *printer) fmtDecimalFloat(v float64, size, prec int) {
|
||||
var d number.Decimal
|
||||
if p.fmt.PrecPresent {
|
||||
prec = p.fmt.Prec
|
||||
}
|
||||
p.initDecimal(prec, prec)
|
||||
d.ConvertFloat(p.toDecimal.RoundingContext, v, size)
|
||||
|
||||
out := p.toDecimal.Format([]byte(nil), &d)
|
||||
p.Buffer.Write(out)
|
||||
}
|
||||
|
||||
func (p *printer) fmtVariableFloat(v float64, size int) {
|
||||
prec := -1
|
||||
if p.fmt.PrecPresent {
|
||||
prec = p.fmt.Prec
|
||||
}
|
||||
var d number.Decimal
|
||||
p.initScientific(0, prec)
|
||||
d.ConvertFloat(p.toScientific.RoundingContext, v, size)
|
||||
|
||||
// Copy logic of 'g' formatting from strconv. It is simplified a bit as
|
||||
// we don't have to mind having prec > len(d.Digits).
|
||||
shortest := prec < 0
|
||||
ePrec := prec
|
||||
if shortest {
|
||||
prec = len(d.Digits)
|
||||
ePrec = 6
|
||||
} else if prec == 0 {
|
||||
prec = 1
|
||||
ePrec = 1
|
||||
}
|
||||
exp := int(d.Exp) - 1
|
||||
if exp < -4 || exp >= ePrec {
|
||||
p.initScientific(0, prec)
|
||||
|
||||
out := p.toScientific.Format([]byte(nil), &d)
|
||||
p.Buffer.Write(out)
|
||||
} else {
|
||||
if prec > int(d.Exp) {
|
||||
prec = len(d.Digits)
|
||||
}
|
||||
if prec -= int(d.Exp); prec < 0 {
|
||||
prec = 0
|
||||
}
|
||||
p.initDecimal(0, prec)
|
||||
|
||||
out := p.toDecimal.Format([]byte(nil), &d)
|
||||
p.Buffer.Write(out)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) fmtScientific(v float64, size, prec int) {
|
||||
var d number.Decimal
|
||||
if p.fmt.PrecPresent {
|
||||
prec = p.fmt.Prec
|
||||
}
|
||||
p.initScientific(prec, prec)
|
||||
rc := p.toScientific.RoundingContext
|
||||
d.ConvertFloat(rc, v, size)
|
||||
|
||||
out := p.toScientific.Format([]byte(nil), &d)
|
||||
p.Buffer.Write(out)
|
||||
|
||||
}
|
||||
|
||||
// fmtComplex formats a complex number v with
|
||||
// r = real(v) and j = imag(v) as (r+ji) using
|
||||
// fmtFloat for r and j formatting.
|
||||
func (p *printer) fmtComplex(v complex128, size int, verb rune) {
|
||||
// Make sure any unsupported verbs are found before the
|
||||
// calls to fmtFloat to not generate an incorrect error string.
|
||||
switch verb {
|
||||
case 'v', 'b', 'g', 'G', 'f', 'F', 'e', 'E':
|
||||
p.WriteByte('(')
|
||||
p.fmtFloat(real(v), size/2, verb)
|
||||
// Imaginary part always has a sign.
|
||||
if math.IsNaN(imag(v)) {
|
||||
// By CLDR's rules, NaNs do not use patterns or signs. As this code
|
||||
// relies on AlwaysSign working for imaginary parts, we need to
|
||||
// manually handle NaNs.
|
||||
f := &p.toScientific
|
||||
p.setFlags(f)
|
||||
p.updatePadding(f)
|
||||
p.setFlags(f)
|
||||
nan := f.Symbol(number.SymNan)
|
||||
extra := 0
|
||||
if w, ok := p.Width(); ok {
|
||||
extra = w - utf8.RuneCountInString(nan) - 1
|
||||
}
|
||||
if f.Flags&number.PadAfterNumber == 0 {
|
||||
for ; extra > 0; extra-- {
|
||||
p.WriteRune(f.PadRune)
|
||||
}
|
||||
}
|
||||
p.WriteString(f.Symbol(number.SymPlusSign))
|
||||
p.WriteString(nan)
|
||||
for ; extra > 0; extra-- {
|
||||
p.WriteRune(f.PadRune)
|
||||
}
|
||||
p.WriteString("i)")
|
||||
return
|
||||
}
|
||||
oldPlus := p.fmt.Plus
|
||||
p.fmt.Plus = true
|
||||
p.fmtFloat(imag(v), size/2, verb)
|
||||
p.WriteString("i)") // TODO: use symbol?
|
||||
p.fmt.Plus = oldPlus
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) fmtString(v string, verb rune) {
|
||||
switch verb {
|
||||
case 'v':
|
||||
if p.fmt.SharpV {
|
||||
p.fmt.fmt_q(v)
|
||||
} else {
|
||||
p.fmt.fmt_s(v)
|
||||
}
|
||||
case 's':
|
||||
p.fmt.fmt_s(v)
|
||||
case 'x':
|
||||
p.fmt.fmt_sx(v, ldigits)
|
||||
case 'X':
|
||||
p.fmt.fmt_sx(v, udigits)
|
||||
case 'q':
|
||||
p.fmt.fmt_q(v)
|
||||
case 'm':
|
||||
ctx := p.cat.Context(p.tag, rawPrinter{p})
|
||||
if ctx.Execute(v) == catalog.ErrNotFound {
|
||||
p.WriteString(v)
|
||||
}
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) fmtBytes(v []byte, verb rune, typeString string) {
|
||||
switch verb {
|
||||
case 'v', 'd':
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(typeString)
|
||||
if v == nil {
|
||||
p.WriteString(nilParenString)
|
||||
return
|
||||
}
|
||||
p.WriteByte('{')
|
||||
for i, c := range v {
|
||||
if i > 0 {
|
||||
p.WriteString(commaSpaceString)
|
||||
}
|
||||
p.fmt0x64(uint64(c), true)
|
||||
}
|
||||
p.WriteByte('}')
|
||||
} else {
|
||||
p.WriteByte('[')
|
||||
for i, c := range v {
|
||||
if i > 0 {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
p.fmt.fmt_integer(uint64(c), 10, unsigned, ldigits)
|
||||
}
|
||||
p.WriteByte(']')
|
||||
}
|
||||
case 's':
|
||||
p.fmt.fmt_s(string(v))
|
||||
case 'x':
|
||||
p.fmt.fmt_bx(v, ldigits)
|
||||
case 'X':
|
||||
p.fmt.fmt_bx(v, udigits)
|
||||
case 'q':
|
||||
p.fmt.fmt_q(string(v))
|
||||
default:
|
||||
p.printValue(reflect.ValueOf(v), verb, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) fmtPointer(value reflect.Value, verb rune) {
|
||||
var u uintptr
|
||||
switch value.Kind() {
|
||||
case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.Slice, reflect.UnsafePointer:
|
||||
u = value.Pointer()
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
return
|
||||
}
|
||||
|
||||
switch verb {
|
||||
case 'v':
|
||||
if p.fmt.SharpV {
|
||||
p.WriteByte('(')
|
||||
p.WriteString(value.Type().String())
|
||||
p.WriteString(")(")
|
||||
if u == 0 {
|
||||
p.WriteString(nilString)
|
||||
} else {
|
||||
p.fmt0x64(uint64(u), true)
|
||||
}
|
||||
p.WriteByte(')')
|
||||
} else {
|
||||
if u == 0 {
|
||||
p.fmt.padString(nilAngleString)
|
||||
} else {
|
||||
p.fmt0x64(uint64(u), !p.fmt.Sharp)
|
||||
}
|
||||
}
|
||||
case 'p':
|
||||
p.fmt0x64(uint64(u), !p.fmt.Sharp)
|
||||
case 'b', 'o', 'd', 'x', 'X':
|
||||
if verb == 'd' {
|
||||
p.fmt.Sharp = true // Print as standard go. TODO: does this make sense?
|
||||
}
|
||||
p.fmtInteger(uint64(u), unsigned, verb)
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) catchPanic(arg interface{}, verb rune) {
|
||||
if err := recover(); err != nil {
|
||||
// If it's a nil pointer, just say "<nil>". The likeliest causes are a
|
||||
// Stringer that fails to guard against nil or a nil pointer for a
|
||||
// value receiver, and in either case, "<nil>" is a nice result.
|
||||
if v := reflect.ValueOf(arg); v.Kind() == reflect.Ptr && v.IsNil() {
|
||||
p.WriteString(nilAngleString)
|
||||
return
|
||||
}
|
||||
// Otherwise print a concise panic message. Most of the time the panic
|
||||
// value will print itself nicely.
|
||||
if p.panicking {
|
||||
// Nested panics; the recursion in printArg cannot succeed.
|
||||
panic(err)
|
||||
}
|
||||
|
||||
oldFlags := p.fmt.Parser
|
||||
// For this output we want default behavior.
|
||||
p.fmt.ClearFlags()
|
||||
|
||||
p.WriteString(percentBangString)
|
||||
p.WriteRune(verb)
|
||||
p.WriteString(panicString)
|
||||
p.panicking = true
|
||||
p.printArg(err, 'v')
|
||||
p.panicking = false
|
||||
p.WriteByte(')')
|
||||
|
||||
p.fmt.Parser = oldFlags
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) handleMethods(verb rune) (handled bool) {
|
||||
if p.erroring {
|
||||
return
|
||||
}
|
||||
// Is it a Formatter?
|
||||
if formatter, ok := p.arg.(format.Formatter); ok {
|
||||
handled = true
|
||||
defer p.catchPanic(p.arg, verb)
|
||||
formatter.Format(p, verb)
|
||||
return
|
||||
}
|
||||
if formatter, ok := p.arg.(fmt.Formatter); ok {
|
||||
handled = true
|
||||
defer p.catchPanic(p.arg, verb)
|
||||
formatter.Format(p, verb)
|
||||
return
|
||||
}
|
||||
|
||||
// If we're doing Go syntax and the argument knows how to supply it, take care of it now.
|
||||
if p.fmt.SharpV {
|
||||
if stringer, ok := p.arg.(fmt.GoStringer); ok {
|
||||
handled = true
|
||||
defer p.catchPanic(p.arg, verb)
|
||||
// Print the result of GoString unadorned.
|
||||
p.fmt.fmt_s(stringer.GoString())
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// If a string is acceptable according to the format, see if
|
||||
// the value satisfies one of the string-valued interfaces.
|
||||
// Println etc. set verb to %v, which is "stringable".
|
||||
switch verb {
|
||||
case 'v', 's', 'x', 'X', 'q':
|
||||
// Is it an error or Stringer?
|
||||
// The duplication in the bodies is necessary:
|
||||
// setting handled and deferring catchPanic
|
||||
// must happen before calling the method.
|
||||
switch v := p.arg.(type) {
|
||||
case error:
|
||||
handled = true
|
||||
defer p.catchPanic(p.arg, verb)
|
||||
p.fmtString(v.Error(), verb)
|
||||
return
|
||||
|
||||
case fmt.Stringer:
|
||||
handled = true
|
||||
defer p.catchPanic(p.arg, verb)
|
||||
p.fmtString(v.String(), verb)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *printer) printArg(arg interface{}, verb rune) {
|
||||
p.arg = arg
|
||||
p.value = reflect.Value{}
|
||||
|
||||
if arg == nil {
|
||||
switch verb {
|
||||
case 'T', 'v':
|
||||
p.fmt.padString(nilAngleString)
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Special processing considerations.
|
||||
// %T (the value's type) and %p (its address) are special; we always do them first.
|
||||
switch verb {
|
||||
case 'T':
|
||||
p.fmt.fmt_s(reflect.TypeOf(arg).String())
|
||||
return
|
||||
case 'p':
|
||||
p.fmtPointer(reflect.ValueOf(arg), 'p')
|
||||
return
|
||||
}
|
||||
|
||||
// Some types can be done without reflection.
|
||||
switch f := arg.(type) {
|
||||
case bool:
|
||||
p.fmtBool(f, verb)
|
||||
case float32:
|
||||
p.fmtFloat(float64(f), 32, verb)
|
||||
case float64:
|
||||
p.fmtFloat(f, 64, verb)
|
||||
case complex64:
|
||||
p.fmtComplex(complex128(f), 64, verb)
|
||||
case complex128:
|
||||
p.fmtComplex(f, 128, verb)
|
||||
case int:
|
||||
p.fmtInteger(uint64(f), signed, verb)
|
||||
case int8:
|
||||
p.fmtInteger(uint64(f), signed, verb)
|
||||
case int16:
|
||||
p.fmtInteger(uint64(f), signed, verb)
|
||||
case int32:
|
||||
p.fmtInteger(uint64(f), signed, verb)
|
||||
case int64:
|
||||
p.fmtInteger(uint64(f), signed, verb)
|
||||
case uint:
|
||||
p.fmtInteger(uint64(f), unsigned, verb)
|
||||
case uint8:
|
||||
p.fmtInteger(uint64(f), unsigned, verb)
|
||||
case uint16:
|
||||
p.fmtInteger(uint64(f), unsigned, verb)
|
||||
case uint32:
|
||||
p.fmtInteger(uint64(f), unsigned, verb)
|
||||
case uint64:
|
||||
p.fmtInteger(f, unsigned, verb)
|
||||
case uintptr:
|
||||
p.fmtInteger(uint64(f), unsigned, verb)
|
||||
case string:
|
||||
p.fmtString(f, verb)
|
||||
case []byte:
|
||||
p.fmtBytes(f, verb, "[]byte")
|
||||
case reflect.Value:
|
||||
// Handle extractable values with special methods
|
||||
// since printValue does not handle them at depth 0.
|
||||
if f.IsValid() && f.CanInterface() {
|
||||
p.arg = f.Interface()
|
||||
if p.handleMethods(verb) {
|
||||
return
|
||||
}
|
||||
}
|
||||
p.printValue(f, verb, 0)
|
||||
default:
|
||||
// If the type is not simple, it might have methods.
|
||||
if !p.handleMethods(verb) {
|
||||
// Need to use reflection, since the type had no
|
||||
// interface methods that could be used for formatting.
|
||||
p.printValue(reflect.ValueOf(f), verb, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// printValue is similar to printArg but starts with a reflect value, not an interface{} value.
|
||||
// It does not handle 'p' and 'T' verbs because these should have been already handled by printArg.
|
||||
func (p *printer) printValue(value reflect.Value, verb rune, depth int) {
|
||||
// Handle values with special methods if not already handled by printArg (depth == 0).
|
||||
if depth > 0 && value.IsValid() && value.CanInterface() {
|
||||
p.arg = value.Interface()
|
||||
if p.handleMethods(verb) {
|
||||
return
|
||||
}
|
||||
}
|
||||
p.arg = nil
|
||||
p.value = value
|
||||
|
||||
switch f := value; value.Kind() {
|
||||
case reflect.Invalid:
|
||||
if depth == 0 {
|
||||
p.WriteString(invReflectString)
|
||||
} else {
|
||||
switch verb {
|
||||
case 'v':
|
||||
p.WriteString(nilAngleString)
|
||||
default:
|
||||
p.badVerb(verb)
|
||||
}
|
||||
}
|
||||
case reflect.Bool:
|
||||
p.fmtBool(f.Bool(), verb)
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
p.fmtInteger(uint64(f.Int()), signed, verb)
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
p.fmtInteger(f.Uint(), unsigned, verb)
|
||||
case reflect.Float32:
|
||||
p.fmtFloat(f.Float(), 32, verb)
|
||||
case reflect.Float64:
|
||||
p.fmtFloat(f.Float(), 64, verb)
|
||||
case reflect.Complex64:
|
||||
p.fmtComplex(f.Complex(), 64, verb)
|
||||
case reflect.Complex128:
|
||||
p.fmtComplex(f.Complex(), 128, verb)
|
||||
case reflect.String:
|
||||
p.fmtString(f.String(), verb)
|
||||
case reflect.Map:
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(f.Type().String())
|
||||
if f.IsNil() {
|
||||
p.WriteString(nilParenString)
|
||||
return
|
||||
}
|
||||
p.WriteByte('{')
|
||||
} else {
|
||||
p.WriteString(mapString)
|
||||
}
|
||||
keys := f.MapKeys()
|
||||
for i, key := range keys {
|
||||
if i > 0 {
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(commaSpaceString)
|
||||
} else {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
}
|
||||
p.printValue(key, verb, depth+1)
|
||||
p.WriteByte(':')
|
||||
p.printValue(f.MapIndex(key), verb, depth+1)
|
||||
}
|
||||
if p.fmt.SharpV {
|
||||
p.WriteByte('}')
|
||||
} else {
|
||||
p.WriteByte(']')
|
||||
}
|
||||
case reflect.Struct:
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(f.Type().String())
|
||||
}
|
||||
p.WriteByte('{')
|
||||
for i := 0; i < f.NumField(); i++ {
|
||||
if i > 0 {
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(commaSpaceString)
|
||||
} else {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
}
|
||||
if p.fmt.PlusV || p.fmt.SharpV {
|
||||
if name := f.Type().Field(i).Name; name != "" {
|
||||
p.WriteString(name)
|
||||
p.WriteByte(':')
|
||||
}
|
||||
}
|
||||
p.printValue(getField(f, i), verb, depth+1)
|
||||
}
|
||||
p.WriteByte('}')
|
||||
case reflect.Interface:
|
||||
value := f.Elem()
|
||||
if !value.IsValid() {
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(f.Type().String())
|
||||
p.WriteString(nilParenString)
|
||||
} else {
|
||||
p.WriteString(nilAngleString)
|
||||
}
|
||||
} else {
|
||||
p.printValue(value, verb, depth+1)
|
||||
}
|
||||
case reflect.Array, reflect.Slice:
|
||||
switch verb {
|
||||
case 's', 'q', 'x', 'X':
|
||||
// Handle byte and uint8 slices and arrays special for the above verbs.
|
||||
t := f.Type()
|
||||
if t.Elem().Kind() == reflect.Uint8 {
|
||||
var bytes []byte
|
||||
if f.Kind() == reflect.Slice {
|
||||
bytes = f.Bytes()
|
||||
} else if f.CanAddr() {
|
||||
bytes = f.Slice(0, f.Len()).Bytes()
|
||||
} else {
|
||||
// We have an array, but we cannot Slice() a non-addressable array,
|
||||
// so we build a slice by hand. This is a rare case but it would be nice
|
||||
// if reflection could help a little more.
|
||||
bytes = make([]byte, f.Len())
|
||||
for i := range bytes {
|
||||
bytes[i] = byte(f.Index(i).Uint())
|
||||
}
|
||||
}
|
||||
p.fmtBytes(bytes, verb, t.String())
|
||||
return
|
||||
}
|
||||
}
|
||||
if p.fmt.SharpV {
|
||||
p.WriteString(f.Type().String())
|
||||
if f.Kind() == reflect.Slice && f.IsNil() {
|
||||
p.WriteString(nilParenString)
|
||||
return
|
||||
}
|
||||
p.WriteByte('{')
|
||||
for i := 0; i < f.Len(); i++ {
|
||||
if i > 0 {
|
||||
p.WriteString(commaSpaceString)
|
||||
}
|
||||
p.printValue(f.Index(i), verb, depth+1)
|
||||
}
|
||||
p.WriteByte('}')
|
||||
} else {
|
||||
p.WriteByte('[')
|
||||
for i := 0; i < f.Len(); i++ {
|
||||
if i > 0 {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
p.printValue(f.Index(i), verb, depth+1)
|
||||
}
|
||||
p.WriteByte(']')
|
||||
}
|
||||
case reflect.Ptr:
|
||||
// pointer to array or slice or struct? ok at top level
|
||||
// but not embedded (avoid loops)
|
||||
if depth == 0 && f.Pointer() != 0 {
|
||||
switch a := f.Elem(); a.Kind() {
|
||||
case reflect.Array, reflect.Slice, reflect.Struct, reflect.Map:
|
||||
p.WriteByte('&')
|
||||
p.printValue(a, verb, depth+1)
|
||||
return
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
case reflect.Chan, reflect.Func, reflect.UnsafePointer:
|
||||
p.fmtPointer(f, verb)
|
||||
default:
|
||||
p.unknownType(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) badArgNum(verb rune) {
|
||||
p.WriteString(percentBangString)
|
||||
p.WriteRune(verb)
|
||||
p.WriteString(badIndexString)
|
||||
}
|
||||
|
||||
func (p *printer) missingArg(verb rune) {
|
||||
p.WriteString(percentBangString)
|
||||
p.WriteRune(verb)
|
||||
p.WriteString(missingString)
|
||||
}
|
||||
|
||||
func (p *printer) doPrintf(fmt string) {
|
||||
for p.fmt.Parser.SetFormat(fmt); p.fmt.Scan(); {
|
||||
switch p.fmt.Status {
|
||||
case format.StatusText:
|
||||
p.WriteString(p.fmt.Text())
|
||||
case format.StatusSubstitution:
|
||||
p.printArg(p.Arg(p.fmt.ArgNum), p.fmt.Verb)
|
||||
case format.StatusBadWidthSubstitution:
|
||||
p.WriteString(badWidthString)
|
||||
p.printArg(p.Arg(p.fmt.ArgNum), p.fmt.Verb)
|
||||
case format.StatusBadPrecSubstitution:
|
||||
p.WriteString(badPrecString)
|
||||
p.printArg(p.Arg(p.fmt.ArgNum), p.fmt.Verb)
|
||||
case format.StatusNoVerb:
|
||||
p.WriteString(noVerbString)
|
||||
case format.StatusBadArgNum:
|
||||
p.badArgNum(p.fmt.Verb)
|
||||
case format.StatusMissingArg:
|
||||
p.missingArg(p.fmt.Verb)
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// Check for extra arguments, but only if there was at least one ordered
|
||||
// argument. Note that this behavior is necessarily different from fmt:
|
||||
// different variants of messages may opt to drop some or all of the
|
||||
// arguments.
|
||||
if !p.fmt.Reordered && p.fmt.ArgNum < len(p.fmt.Args) && p.fmt.ArgNum != 0 {
|
||||
p.fmt.ClearFlags()
|
||||
p.WriteString(extraString)
|
||||
for i, arg := range p.fmt.Args[p.fmt.ArgNum:] {
|
||||
if i > 0 {
|
||||
p.WriteString(commaSpaceString)
|
||||
}
|
||||
if arg == nil {
|
||||
p.WriteString(nilAngleString)
|
||||
} else {
|
||||
p.WriteString(reflect.TypeOf(arg).String())
|
||||
p.WriteString("=")
|
||||
p.printArg(arg, 'v')
|
||||
}
|
||||
}
|
||||
p.WriteByte(')')
|
||||
}
|
||||
}
|
||||
|
||||
func (p *printer) doPrint(a []interface{}) {
|
||||
prevString := false
|
||||
for argNum, arg := range a {
|
||||
isString := arg != nil && reflect.TypeOf(arg).Kind() == reflect.String
|
||||
// Add a space between two non-string arguments.
|
||||
if argNum > 0 && !isString && !prevString {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
p.printArg(arg, 'v')
|
||||
prevString = isString
|
||||
}
|
||||
}
|
||||
|
||||
// doPrintln is like doPrint but always adds a space between arguments
|
||||
// and a newline after the last argument.
|
||||
func (p *printer) doPrintln(a []interface{}) {
|
||||
for argNum, arg := range a {
|
||||
if argNum > 0 {
|
||||
p.WriteByte(' ')
|
||||
}
|
||||
p.printArg(arg, 'v')
|
||||
}
|
||||
p.WriteByte('\n')
|
||||
}
|
Loading…
Reference in New Issue