feat(zfs-localpv): vendor packages for zfs-localpv

Signed-off-by: Pawan <pawan@mayadata.io>
This commit is contained in:
Pawan 2019-09-12 12:31:03 +05:30 committed by Kiran Mova
parent 8d81b8b9c6
commit 8a4ed4e7b8
2246 changed files with 950001 additions and 0 deletions

3
vendor/golang.org/x/text/AUTHORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

3
vendor/golang.org/x/text/CONTRIBUTORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

27
vendor/golang.org/x/text/LICENSE generated vendored Normal file
View file

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
vendor/golang.org/x/text/PATENTS generated vendored Normal file
View file

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

702
vendor/golang.org/x/text/collate/build/builder.go generated vendored Normal file
View file

@ -0,0 +1,702 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build // import "golang.org/x/text/collate/build"
import (
"fmt"
"io"
"log"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// TODO: optimizations:
// - expandElem is currently 20K. By putting unique colElems in a separate
// table and having a byte array of indexes into this table, we can reduce
// the total size to about 7K. By also factoring out the length bytes, we
// can reduce this to about 6K.
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
// and many consecutive values with the same stride. This can be further
// compacted.
// - Compress secondary weights into 8 bits.
// - Some LDML specs specify a context element. Currently we simply concatenate
// those. Context can be implemented using the contraction trie. If Builder
// could analyze and detect when using a context makes sense, there is no
// need to expose this construct in the API.
// A Builder builds a root collation table. The user must specify the
// collation elements for each entry. A common use will be to base the weights
// on those specified in the allkeys* file as provided by the UCA or CLDR.
type Builder struct {
index *trieBuilder
root ordering
locale []*Tailoring
t *table
err error
built bool
minNonVar int // lowest primary recorded for a variable
varTop int // highest primary recorded for a non-variable
// indexes used for reusing expansions and contractions
expIndex map[string]int // positions of expansions keyed by their string representation
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
ctElem map[string]int // contraction elements keyed by their string representation
}
// A Tailoring builds a collation table based on another collation table.
// The table is defined by specifying tailorings to the underlying table.
// See https://unicode.org/reports/tr35/ for an overview of tailoring
// collation tables. The CLDR contains pre-defined tailorings for a variety
// of languages (See https://www.unicode.org/Public/cldr/<version>/core.zip.)
type Tailoring struct {
id string
builder *Builder
index *ordering
anchor *entry
before bool
}
// NewBuilder returns a new Builder.
func NewBuilder() *Builder {
return &Builder{
index: newTrieBuilder(),
root: makeRootOrdering(),
expIndex: make(map[string]int),
ctHandle: make(map[string]ctHandle),
ctElem: make(map[string]int),
}
}
// Tailoring returns a Tailoring for the given locale. One should
// have completed all calls to Add before calling Tailoring.
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
t := &Tailoring{
id: loc.String(),
builder: b,
index: b.root.clone(),
}
t.index.id = t.id
b.locale = append(b.locale, t)
return t
}
// Add adds an entry to the collation element table, mapping
// a slice of runes to a sequence of collation elements.
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
// The entries are typically obtained from a collation element table
// as defined in https://www.unicode.org/reports/tr10/#Data_Table_Format.
// Note that the collation elements specified by colelems are only used
// as a guide. The actual weights generated by Builder may differ.
// The argument variables is a list of indices into colelems that should contain
// a value for each colelem that is a variable. (See the reference above.)
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
str := string(runes)
elems := make([]rawCE, len(colelems))
for i, ce := range colelems {
if len(ce) == 0 {
break
}
elems[i] = makeRawCE(ce, 0)
if len(ce) == 1 {
elems[i].w[1] = defaultSecondary
}
if len(ce) <= 2 {
elems[i].w[2] = defaultTertiary
}
if len(ce) <= 3 {
elems[i].w[3] = ce[0]
}
}
for i, ce := range elems {
p := ce.w[0]
isvar := false
for _, j := range variables {
if i == j {
isvar = true
}
}
if isvar {
if p >= b.minNonVar && b.minNonVar > 0 {
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
}
if p > b.varTop {
b.varTop = p
}
} else if p > 1 { // 1 is a special primary value reserved for FFFE
if p <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
}
if b.minNonVar == 0 || p < b.minNonVar {
b.minNonVar = p
}
}
}
elems, err := convertLargeWeights(elems)
if err != nil {
return err
}
cccs := []uint8{}
nfd := norm.NFD.String(str)
for i := range nfd {
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
}
if len(cccs) < len(elems) {
if len(cccs) > 2 {
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
}
p := len(elems) - 1
for ; p > 0 && elems[p].w[0] == 0; p-- {
elems[p].ccc = cccs[len(cccs)-1]
}
for ; p >= 0; p-- {
elems[p].ccc = cccs[0]
}
} else {
for i := range elems {
elems[i].ccc = cccs[i]
}
}
// doNorm in collate.go assumes that the following conditions hold.
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
}
b.root.newEntry(str, elems)
return nil
}
func (t *Tailoring) setAnchor(anchor string) error {
anchor = norm.NFC.String(anchor)
a := t.index.find(anchor)
if a == nil {
a = t.index.newEntry(anchor, nil)
a.implicit = true
a.modified = true
for _, r := range []rune(anchor) {
e := t.index.find(string(r))
e.lock = true
}
}
t.anchor = a
return nil
}
// SetAnchor sets the point after which elements passed in subsequent calls to
// Insert will be inserted. It is equivalent to the reset directive in an LDML
// specification. See Insert for an example.
// SetAnchor supports the following logical reset positions:
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
// and <last_non_ignorable/>.
func (t *Tailoring) SetAnchor(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = false
return nil
}
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
// Insert will insert entries before the anchor.
func (t *Tailoring) SetAnchorBefore(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = true
return nil
}
// Insert sets the ordering of str relative to the entry set by the previous
// call to SetAnchor or Insert. The argument extend corresponds
// to the extend elements as defined in LDML. A non-empty value for extend
// will cause the collation elements corresponding to extend to be appended
// to the collation elements generated for the entry added by Insert.
// This has the same net effect as sorting str after the string anchor+extend.
// See https://www.unicode.org/reports/tr10/#Tailoring_Example for details
// on parametric tailoring and https://unicode.org/reports/tr35/#Collation_Elements
// for full details on LDML.
//
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
// at the primary sorting level:
// t := b.Tailoring("se")
// t.SetAnchor("z")
// t.Insert(colltab.Primary, "ä", "")
// Order "ü" after "ue" at the secondary sorting level:
// t.SetAnchor("ue")
// t.Insert(colltab.Secondary, "ü","")
// or
// t.SetAnchor("u")
// t.Insert(colltab.Secondary, "ü", "e")
// Order "q" afer "ab" at the secondary level and "Q" after "q"
// at the tertiary level:
// t.SetAnchor("ab")
// t.Insert(colltab.Secondary, "q", "")
// t.Insert(colltab.Tertiary, "Q", "")
// Order "b" before "a":
// t.SetAnchorBefore("a")
// t.Insert(colltab.Primary, "b", "")
// Order "0" after the last primary ignorable:
// t.SetAnchor("<last_primary_ignorable/>")
// t.Insert(colltab.Primary, "0", "")
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
if t.anchor == nil {
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
}
str = norm.NFC.String(str)
e := t.index.find(str)
if e == nil {
e = t.index.newEntry(str, nil)
} else if e.logical != noAnchor {
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
}
if e.lock {
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
}
a := t.anchor
// Find the first element after the anchor which differs at a level smaller or
// equal to the given level. Then insert at this position.
// See https://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
e.before = t.before
if t.before {
t.before = false
if a.prev == nil {
a.insertBefore(e)
} else {
for a = a.prev; a.level > level; a = a.prev {
}
a.insertAfter(e)
}
e.level = level
} else {
for ; a.level > level; a = a.next {
}
e.level = a.level
if a != e {
a.insertAfter(e)
a.level = level
} else {
// We don't set a to prev itself. This has the effect of the entry
// getting new collation elements that are an increment of itself.
// This is intentional.
a.prev.level = level
}
}
e.extend = norm.NFD.String(extend)
e.exclude = false
e.modified = true
e.elems = nil
t.anchor = e
return nil
}
func (o *ordering) getWeight(e *entry) []rawCE {
if len(e.elems) == 0 && e.logical == noAnchor {
if e.implicit {
for _, r := range e.runes {
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
}
} else if e.before {
count := [colltab.Identity + 1]int{}
a := e
for ; a.elems == nil && !a.implicit; a = a.next {
count[a.level]++
}
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
for i := colltab.Primary; i < colltab.Quaternary; i++ {
if count[i] != 0 {
e.elems[0].w[i] -= count[i]
break
}
}
if e.prev != nil {
o.verifyWeights(e.prev, e, e.prev.level)
}
} else {
prev := e.prev
e.elems = nextWeight(prev.level, o.getWeight(prev))
o.verifyWeights(e, e.next, e.level)
}
}
return e.elems
}
func (o *ordering) addExtension(e *entry) {
if ex := o.find(e.extend); ex != nil {
e.elems = append(e.elems, ex.elems...)
} else {
for _, r := range []rune(e.extend) {
e.elems = append(e.elems, o.find(string(r)).elems...)
}
}
e.extend = ""
}
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
return nil
}
for i := colltab.Primary; i < level; i++ {
if a.elems[0].w[i] < b.elems[0].w[i] {
return nil
}
}
if a.elems[0].w[level] >= b.elems[0].w[level] {
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
log.Println(err)
// TODO: return the error instead, or better, fix the conflicting entry by making room.
}
return nil
}
func (b *Builder) error(e error) {
if e != nil {
b.err = e
}
}
func (b *Builder) errorID(locale string, e error) {
if e != nil {
b.err = fmt.Errorf("%s:%v", locale, e)
}
}
// patchNorm ensures that NFC and NFD counterparts are consistent.
func (o *ordering) patchNorm() {
// Insert the NFD counterparts, if necessary.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if nfd != e.str {
if e0 := o.find(nfd); e0 != nil && !e0.modified {
e0.elems = e.elems
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
e := o.newEntry(nfd, e.elems)
e.modified = true
}
}
}
// Update unchanged composed forms if one of their parts changed.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if e.modified || nfd == e.str {
continue
}
if e0 := o.find(nfd); e0 != nil {
e.elems = e0.elems
} else {
e.elems = o.genColElems(nfd)
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
r := []rune(nfd)
head := string(r[0])
tail := ""
for i := 1; i < len(r); i++ {
s := norm.NFC.String(head + string(r[i]))
if e0 := o.find(s); e0 != nil && e0.modified {
head = s
} else {
tail += string(r[i])
}
}
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
}
}
}
// Exclude entries for which the individual runes generate the same collation elements.
for _, e := range o.ordered {
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
e.exclude = true
}
}
}
func (b *Builder) buildOrdering(o *ordering) {
for _, e := range o.ordered {
o.getWeight(e)
}
for _, e := range o.ordered {
o.addExtension(e)
}
o.patchNorm()
o.sort()
simplify(o)
b.processExpansions(o) // requires simplify
b.processContractions(o) // requires simplify
t := newNode()
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.skip() {
ce, err := e.encode()
b.errorID(o.id, err)
t.insert(e.runes[0], ce)
}
}
o.handle = b.index.addTrie(t)
}
func (b *Builder) build() (*table, error) {
if b.built {
return b.t, b.err
}
b.built = true
b.t = &table{
Table: colltab.Table{
MaxContractLen: utf8.UTFMax,
VariableTop: uint32(b.varTop),
},
}
b.buildOrdering(&b.root)
b.t.root = b.root.handle
for _, t := range b.locale {
b.buildOrdering(t.index)
if b.err != nil {
break
}
}
i, err := b.index.generate()
b.t.trie = *i
b.t.Index = colltab.Trie{
Index: i.index,
Values: i.values,
Index0: i.index[blockSize*b.t.root.lookupStart:],
Values0: i.values[blockSize*b.t.root.valueStart:],
}
b.error(err)
return b.t, b.err
}
// Build builds the root Collator.
func (b *Builder) Build() (colltab.Weighter, error) {
table, err := b.build()
if err != nil {
return nil, err
}
return table, nil
}
// Build builds a Collator for Tailoring t.
func (t *Tailoring) Build() (colltab.Weighter, error) {
// TODO: implement.
return nil, nil
}
// Print prints the tables for b and all its Tailorings as a Go file
// that can be included in the Collate package.
func (b *Builder) Print(w io.Writer) (n int, err error) {
p := func(nn int, e error) {
n += nn
if err == nil {
err = e
}
}
t, err := b.build()
if err != nil {
return 0, err
}
p(fmt.Fprintf(w, `var availableLocales = "und`))
for _, loc := range b.locale {
if loc.id != "und" {
p(fmt.Fprintf(w, ",%s", loc.id))
}
}
p(fmt.Fprint(w, "\"\n\n"))
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
for _, loc := range b.locale {
if loc.id == "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
for _, loc := range b.locale {
if loc.id != "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
p(fmt.Fprint(w, "}\n\n"))
n, _, err = t.fprint(w, "main")
return
}
// reproducibleFromNFKD checks whether the given expansion could be generated
// from an NFKD expansion.
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
// Length must be equal.
if len(exp) != len(nfkd) {
return false
}
for i, ce := range exp {
// Primary and secondary values should be equal.
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
return false
}
// Tertiary values should be equal to maxTertiary for third element onwards.
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
// simply be dropped. Try this out by dropping the following code.
if i >= 2 && ce.w[2] != maxTertiary {
return false
}
if _, err := makeCE(ce); err != nil {
// Simply return false. The error will be caught elsewhere.
return false
}
}
return true
}
func simplify(o *ordering) {
// Runes that are a starter of a contraction should not be removed.
// (To date, there is only Kannada character 0CCA.)
keep := make(map[rune]bool)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if len(e.runes) > 1 {
keep[e.runes[0]] = true
}
}
// Tag entries for which the runes NFKD decompose to identical values.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
s := e.str
nfkd := norm.NFKD.String(s)
nfd := norm.NFD.String(s)
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
continue
}
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
e.decompose = true
}
}
}
// appendExpansion converts the given collation sequence to
// collation elements and adds them to the expansion table.
// It returns an index to the expansion table.
func (b *Builder) appendExpansion(e *entry) int {
t := b.t
i := len(t.ExpandElem)
ce := uint32(len(e.elems))
t.ExpandElem = append(t.ExpandElem, ce)
for _, w := range e.elems {
ce, err := makeCE(w)
if err != nil {
b.error(err)
return -1
}
t.ExpandElem = append(t.ExpandElem, ce)
}
return i
}
// processExpansions extracts data necessary to generate
// the extraction tables.
func (b *Builder) processExpansions(o *ordering) {
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.expansion() {
continue
}
key := fmt.Sprintf("%v", e.elems)
i, ok := b.expIndex[key]
if !ok {
i = b.appendExpansion(e)
b.expIndex[key] = i
}
e.expansionIndex = i
}
}
func (b *Builder) processContractions(o *ordering) {
// Collate contractions per starter rune.
starters := []rune{}
cm := make(map[rune][]*entry)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if e.contraction() {
if len(e.str) > b.t.MaxContractLen {
b.t.MaxContractLen = len(e.str)
}
r := e.runes[0]
if _, ok := cm[r]; !ok {
starters = append(starters, r)
}
cm[r] = append(cm[r], e)
}
}
// Add entries of single runes that are at a start of a contraction.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.contraction() {
r := e.runes[0]
if _, ok := cm[r]; ok {
cm[r] = append(cm[r], e)
}
}
}
// Build the tries for the contractions.
t := b.t
for _, r := range starters {
l := cm[r]
// Compute suffix strings. There are 31 different contraction suffix
// sets for 715 contractions and 82 contraction starter runes as of
// version 6.0.0.
sufx := []string{}
hasSingle := false
for _, e := range l {
if len(e.runes) > 1 {
sufx = append(sufx, string(e.runes[1:]))
} else {
hasSingle = true
}
}
if !hasSingle {
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
continue
}
// Unique the suffix set.
sort.Strings(sufx)
key := strings.Join(sufx, "\n")
handle, ok := b.ctHandle[key]
if !ok {
var err error
handle, err = appendTrie(&t.ContractTries, sufx)
if err != nil {
b.error(err)
}
b.ctHandle[key] = handle
}
// Bucket sort entries in index order.
es := make([]*entry, len(l))
for _, e := range l {
var p, sn int
if len(e.runes) > 1 {
str := []byte(string(e.runes[1:]))
p, sn = lookup(&t.ContractTries, handle, str)
if sn != len(str) {
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
}
}
if es[p] != nil {
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
}
es[p] = e
}
// Create collation elements for contractions.
elems := []uint32{}
for _, e := range es {
ce, err := e.encodeBase()
b.errorID(o.id, err)
elems = append(elems, ce)
}
key = fmt.Sprintf("%v", elems)
i, ok := b.ctElem[key]
if !ok {
i = len(t.ContractElem)
b.ctElem[key] = i
t.ContractElem = append(t.ContractElem, elems...)
}
// Store info in entry for starter rune.
es[0].contractionIndex = i
es[0].contractionHandle = handle
}
}

294
vendor/golang.org/x/text/collate/build/colelem.go generated vendored Normal file
View file

@ -0,0 +1,294 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"unicode"
"golang.org/x/text/internal/colltab"
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
)
type rawCE struct {
w []int
ccc uint8
}
func makeRawCE(w []int, ccc uint8) rawCE {
ce := rawCE{w: make([]int, 4), ccc: ccc}
copy(ce.w, w)
return ce
}
// A collation element is represented as an uint32.
// In the typical case, a rune maps to a single collation element. If a rune
// can be the start of a contraction or expands into multiple collation elements,
// then the collation element that is associated with a rune will have a special
// form to represent such m to n mappings. Such special collation elements
// have a value >= 0x80000000.
const (
maxPrimaryBits = 21
maxSecondaryBits = 12
maxTertiaryBits = 8
)
func makeCE(ce rawCE) (uint32, error) {
v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
return uint32(v), e
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
if h.n >= 1<<maxNBits {
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
}
if h.index >= 1<<maxTrieIndexBits {
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
}
if offset >= 1<<maxContractOffsetBits {
return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
}
ce := uint32(contractID)
ce += uint32(offset << (maxNBits + maxTrieIndexBits))
ce += uint32(h.index << maxNBits)
ce += uint32(h.n)
return ce, nil
}
// For expansions, collation elements are of the form
// 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const (
expandID = 0xE0000000
maxExpandIndexBits = 16
)
func makeExpandIndex(index int) (uint32, error) {
if index >= 1<<maxExpandIndexBits {
return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
}
return expandID + uint32(index), nil
}
// Each list of collation elements corresponding to an expansion starts with
// a header indicating the length of the sequence.
func makeExpansionHeader(n int) (uint32, error) {
return uint32(n), nil
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The collation element, in this case, is of the form
// 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
const (
decompID = 0xF0000000
)
func makeDecompose(t1, t2 int) (uint32, error) {
if t1 >= 256 || t1 < 0 {
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
}
if t2 >= 256 || t2 < 0 {
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
}
return uint32(t2<<8+t1) + decompID, nil
}
const (
// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// https://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}
// convertLargeWeights converts collation elements with large
// primaries (either double primaries or for illegal runes)
// to our own representation.
// A CJK character C is represented in the DUCET as
// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
// We will rewrite these characters to a single CE.
// We assume the CJK values start at 0x8000.
// See https://unicode.org/reports/tr10/#Implicit_Weights
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
const (
cjkPrimaryStart = 0xFB40
rarePrimaryStart = 0xFB80
otherPrimaryStart = 0xFBC0
illegalPrimary = 0xFFFE
highBitsMask = 0x3F
lowBitsMask = 0x7FFF
lowBitsFlag = 0x8000
shiftBits = 15
)
for i := 0; i < len(elems); i++ {
ce := elems[i].w
p := ce[0]
if p < cjkPrimaryStart {
continue
}
if p > 0xFFFF {
return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
}
if p >= illegalPrimary {
ce[0] = illegalOffset + p - illegalPrimary
} else {
if i+1 >= len(elems) {
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
}
if elems[i+1].w[0]&lowBitsFlag == 0 {
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
}
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
switch {
case p < rarePrimaryStart:
np += commonUnifiedOffset
case p < otherPrimaryStart:
np += rareUnifiedOffset
default:
p += otherOffset
}
ce[0] = np
for j := i + 1; j+1 < len(elems); j++ {
elems[j] = elems[j+1]
}
elems = elems[:len(elems)-1]
}
}
return elems, nil
}
// nextWeight computes the first possible collation weights following elems
// for the given level.
func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
if level == colltab.Identity {
next := make([]rawCE, len(elems))
copy(next, elems)
return next
}
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
next[0].w[level]++
if level < colltab.Secondary {
next[0].w[colltab.Secondary] = defaultSecondary
}
if level < colltab.Tertiary {
next[0].w[colltab.Tertiary] = defaultTertiary
}
// Filter entries that cannot influence ordering.
for _, ce := range elems[1:] {
skip := true
for i := colltab.Primary; i < level; i++ {
skip = skip && ce.w[i] == 0
}
if !skip {
next = append(next, ce)
}
}
return next
}
func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
}
if i < len(elems) {
return i, elems[i].w[level]
}
return i, 0
}
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
// It also returns the collation level at which the difference is found.
func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
for level := colltab.Primary; level < colltab.Identity; level++ {
var va, vb int
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
ia, va = nextVal(a, ia, level)
ib, vb = nextVal(b, ib, level)
if va != vb {
if va < vb {
return -1, level
} else {
return 1, level
}
}
}
}
return 0, colltab.Identity
}
func equalCE(a, b rawCE) bool {
for i := 0; i < 3; i++ {
if b.w[i] != a.w[i] {
return false
}
}
return true
}
func equalCEArrays(a, b []rawCE) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if !equalCE(a[i], b[i]) {
return false
}
}
return true
}

309
vendor/golang.org/x/text/collate/build/contract.go generated vendored Normal file
View file

@ -0,0 +1,309 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/colltab"
)
// This file contains code for detecting contractions and generating
// the necessary tables.
// Any Unicode Collation Algorithm (UCA) table entry that has more than
// one rune one the left-hand side is called a contraction.
// See https://www.unicode.org/reports/tr10/#Contractions for more details.
//
// We define the following terms:
// initial: a rune that appears as the first rune in a contraction.
// suffix: a sequence of runes succeeding the initial rune
// in a given contraction.
// non-initial: a rune that appears in a suffix.
//
// A rune may be both an initial and a non-initial and may be so in
// many contractions. An initial may typically also appear by itself.
// In case of ambiguities, the UCA requires we match the longest
// contraction.
//
// Many contraction rules share the same set of possible suffixes.
// We store sets of suffixes in a trie that associates an index with
// each suffix in the set. This index can be used to look up a
// collation element associated with the (starter rune, suffix) pair.
//
// The trie is defined on a UTF-8 byte sequence.
// The overall trie is represented as an array of ctEntries. Each node of the trie
// is represented as a subsequence of ctEntries, where each entry corresponds to
// a possible match of a next character in the search string. An entry
// also includes the length and offset to the next sequence of entries
// to check in case of a match.
const (
final = 0
noIndex = 0xFF
)
// ctEntry associates to a matching byte an offset and/or next sequence of
// bytes to check. A ctEntry c is called final if a match means that the
// longest suffix has been found. An entry c is final if c.N == 0.
// A single final entry can match a range of characters to an offset.
// A non-final entry always matches a single byte. Note that a non-final
// entry might still resemble a completed suffix.
// Examples:
// The suffix strings "ab" and "ac" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
// }
//
// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
// {'d', 'd', final, 3}, // "abd" -> 3
// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
// {'d', 'd', final, 4}, // "abcd" -> 4
// }
// See genStateTests in contract_test.go for more examples.
type ctEntry struct {
L uint8 // non-final: byte value to match; final: lowest match in range.
H uint8 // non-final: relative index to next block; final: highest match in range.
N uint8 // non-final: length of next block; final: final
I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
}
// contractTrieSet holds a set of contraction tries. The tries are stored
// consecutively in the entry field.
type contractTrieSet []struct{ l, h, n, i uint8 }
// ctHandle is used to identify a trie in the trie set, consisting in an offset
// in the array and the size of the first node.
type ctHandle struct {
index, n int
}
// appendTrie adds a new trie for the given suffixes to the trie set and returns
// a handle to it. The handle will be invalid on error.
func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
es := make([]stridx, len(suffixes))
for i, s := range suffixes {
es[i].str = s
}
sort.Sort(offsetSort(es))
for i := range es {
es[i].index = i + 1
}
sort.Sort(genidxSort(es))
i := len(*ct)
n, err := genStates(ct, es)
if err != nil {
*ct = (*ct)[:i]
return ctHandle{}, err
}
return ctHandle{i, n}, nil
}
// genStates generates ctEntries for a given suffix set and returns
// the number of entries for the first node.
func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
if len(sis) == 0 {
return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
}
start := len(*ct)
// create entries for differing first bytes.
for _, si := range sis {
s := si.str
if len(s) == 0 {
continue
}
added := false
c := s[0]
if len(s) > 1 {
for j := len(*ct) - 1; j >= start; j-- {
if (*ct)[j].L == c {
added = true
break
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, I: noIndex})
}
} else {
for j := len(*ct) - 1; j >= start; j-- {
// Update the offset for longer suffixes with the same byte.
if (*ct)[j].L == c {
(*ct)[j].I = uint8(si.index)
added = true
}
// Extend range of final ctEntry, if possible.
if (*ct)[j].H+1 == c {
(*ct)[j].H = c
added = true
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
}
}
}
n := len(*ct) - start
// Append nodes for the remainder of the suffixes for each ctEntry.
sp := 0
for i, end := start, len(*ct); i < end; i++ {
fe := (*ct)[i]
if fe.H == 0 { // uninitialized non-final
ln := len(*ct) - start - n
if ln > 0xFF {
return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
}
fe.H = uint8(ln)
// Find first non-final strings with same byte as current entry.
for ; sis[sp].str[0] != fe.L; sp++ {
}
se := sp + 1
for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
}
sl := sis[sp:se]
sp = se
for i, si := range sl {
sl[i].str = si.str[1:]
}
nn, err := genStates(ct, sl)
if err != nil {
return 0, err
}
fe.N = uint8(nn)
(*ct)[i] = fe
}
}
sort.Sort(entrySort((*ct)[start : start+n]))
return n, nil
}
// There may be both a final and non-final entry for a byte if the byte
// is implied in a range of matches in the final entry.
// We need to ensure that the non-final entry comes first in that case.
type entrySort colltab.ContractTrieSet
func (fe entrySort) Len() int { return len(fe) }
func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
func (fe entrySort) Less(i, j int) bool {
return fe[i].L > fe[j].L
}
// stridx is used for sorting suffixes and their associated offsets.
type stridx struct {
str string
index int
}
// For computing the offsets, we first sort by size, and then by string.
// This ensures that strings that only differ in the last byte by 1
// are sorted consecutively in increasing order such that they can
// be packed as a range in a final ctEntry.
type offsetSort []stridx
func (si offsetSort) Len() int { return len(si) }
func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si offsetSort) Less(i, j int) bool {
if len(si[i].str) != len(si[j].str) {
return len(si[i].str) > len(si[j].str)
}
return si[i].str < si[j].str
}
// For indexing, we want to ensure that strings are sorted in string order, where
// for strings with the same prefix, we put longer strings before shorter ones.
type genidxSort []stridx
func (si genidxSort) Len() int { return len(si) }
func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si genidxSort) Less(i, j int) bool {
if strings.HasPrefix(si[j].str, si[i].str) {
return false
}
if strings.HasPrefix(si[i].str, si[j].str) {
return true
}
return si[i].str < si[j].str
}
// lookup matches the longest suffix in str and returns the associated offset
// and the number of bytes consumed.
func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
states := (*ct)[h.index:]
p := 0
n := h.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
index, ns = int(e.I), p
}
if e.N != final {
// set to new state
i, states, n = 0, states[int(e.H)+n:], int(e.N)
} else {
return
}
continue
} else if e.N == final && c <= e.H {
p++
return int(c-e.L) + int(e.I), p
}
}
i++
}
return
}
// print writes the contractTrieSet t as compilable Go code to w. It returns
// the total number of bytes written and the size of the resulting data structure in bytes.
func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
update3 := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
update2 := func(nn int, e error) { update3(nn, 0, e) }
update3(printArray(*t, w, name))
update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
update3(printStruct(*t, w, name))
update2(fmt.Fprintln(w))
return
}
func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
size = len(ct) * 4
p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
for _, fe := range ct {
p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
}
p("}\n")
return
}
func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
size = int(reflect.TypeOf(ct).Size())
return
}

393
vendor/golang.org/x/text/collate/build/order.go generated vendored Normal file
View file

@ -0,0 +1,393 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"log"
"sort"
"strings"
"unicode"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/unicode/norm"
)
type logicalAnchor int
const (
firstAnchor logicalAnchor = -1
noAnchor = 0
lastAnchor = 1
)
// entry is used to keep track of a single entry in the collation element table
// during building. Examples of entries can be found in the Default Unicode
// Collation Element Table.
// See https://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
type entry struct {
str string // same as string(runes)
runes []rune
elems []rawCE // the collation elements
extend string // weights of extend to be appended to elems
before bool // weights relative to next instead of previous.
lock bool // entry is used in extension and can no longer be moved.
// prev, next, and level are used to keep track of tailorings.
prev, next *entry
level colltab.Level // next differs at this level
skipRemove bool // do not unlink when removed
decompose bool // can use NFKD decomposition to generate elems
exclude bool // do not include in table
implicit bool // derived, is not included in the list
modified bool // entry was modified in tailoring
logical logicalAnchor
expansionIndex int // used to store index into expansion table
contractionHandle ctHandle
contractionIndex int // index into contraction elements
}
func (e *entry) String() string {
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
}
func (e *entry) skip() bool {
return e.contraction()
}
func (e *entry) expansion() bool {
return !e.decompose && len(e.elems) > 1
}
func (e *entry) contraction() bool {
return len(e.runes) > 1
}
func (e *entry) contractionStarter() bool {
return e.contractionHandle.n != 0
}
// nextIndexed gets the next entry that needs to be stored in the table.
// It returns the entry and the collation level at which the next entry differs
// from the current entry.
// Entries that can be explicitly derived and logical reset positions are
// examples of entries that will not be indexed.
func (e *entry) nextIndexed() (*entry, colltab.Level) {
level := e.level
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
if e.level < level {
level = e.level
}
}
return e, level
}
// remove unlinks entry e from the sorted chain and clears the collation
// elements. e may not be at the front or end of the list. This should always
// be the case, as the front and end of the list are always logical anchors,
// which may not be removed.
func (e *entry) remove() {
if e.logical != noAnchor {
log.Fatalf("may not remove anchor %q", e.str)
}
// TODO: need to set e.prev.level to e.level if e.level is smaller?
e.elems = nil
if !e.skipRemove {
if e.prev != nil {
e.prev.next = e.next
}
if e.next != nil {
e.next.prev = e.prev
}
}
e.skipRemove = false
}
// insertAfter inserts n after e.
func (e *entry) insertAfter(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.next = e.next
n.prev = e
if e.next != nil {
e.next.prev = n
}
e.next = n
}
// insertBefore inserts n before e.
func (e *entry) insertBefore(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.prev = e.prev
n.next = e
if e.prev != nil {
e.prev.next = n
}
e.prev = n
}
func (e *entry) encodeBase() (ce uint32, err error) {
switch {
case e.expansion():
ce, err = makeExpandIndex(e.expansionIndex)
default:
if e.decompose {
log.Fatal("decompose should be handled elsewhere")
}
ce, err = makeCE(e.elems[0])
}
return
}
func (e *entry) encode() (ce uint32, err error) {
if e.skip() {
log.Fatal("cannot build colElem for entry that should be skipped")
}
switch {
case e.decompose:
t1 := e.elems[0].w[2]
t2 := 0
if len(e.elems) > 1 {
t2 = e.elems[1].w[2]
}
ce, err = makeDecompose(t1, t2)
case e.contractionStarter():
ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
default:
if len(e.runes) > 1 {
log.Fatal("colElem: contractions are handled in contraction trie")
}
ce, err = e.encodeBase()
}
return
}
// entryLess returns true if a sorts before b and false otherwise.
func entryLess(a, b *entry) bool {
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
return res == -1
}
if a.logical != noAnchor {
return a.logical == firstAnchor
}
if b.logical != noAnchor {
return b.logical == lastAnchor
}
return a.str < b.str
}
type sortedEntries []*entry
func (s sortedEntries) Len() int {
return len(s)
}
func (s sortedEntries) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sortedEntries) Less(i, j int) bool {
return entryLess(s[i], s[j])
}
type ordering struct {
id string
entryMap map[string]*entry
ordered []*entry
handle *trieHandle
}
// insert inserts e into both entryMap and ordered.
// Note that insert simply appends e to ordered. To reattain a sorted
// order, o.sort() should be called.
func (o *ordering) insert(e *entry) {
if e.logical == noAnchor {
o.entryMap[e.str] = e
} else {
// Use key format as used in UCA rules.
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
// Also add index entry for XML format.
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
}
o.ordered = append(o.ordered, e)
}
// newEntry creates a new entry for the given info and inserts it into
// the index.
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
e := &entry{
runes: []rune(s),
elems: ces,
str: s,
}
o.insert(e)
return e
}
// find looks up and returns the entry for the given string.
// It returns nil if str is not in the index and if an implicit value
// cannot be derived, that is, if str represents more than one rune.
func (o *ordering) find(str string) *entry {
e := o.entryMap[str]
if e == nil {
r := []rune(str)
if len(r) == 1 {
const (
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r[0] >= firstHangul && r[0] <= lastHangul {
ce := []rawCE{}
nfd := norm.NFD.String(str)
for _, r := range nfd {
ce = append(ce, o.find(string(r)).elems...)
}
e = o.newEntry(nfd, ce)
} else {
e = o.newEntry(string(r[0]), []rawCE{
{w: []int{
implicitPrimary(r[0]),
defaultSecondary,
defaultTertiary,
int(r[0]),
},
},
})
e.modified = true
}
e.exclude = true // do not index implicits
}
}
return e
}
// makeRootOrdering returns a newly initialized ordering value and populates
// it with a set of logical reset points that can be used as anchors.
// The anchors first_tertiary_ignorable and __END__ will always sort at
// the beginning and end, respectively. This means that prev and next are non-nil
// for any indexed entry.
func makeRootOrdering() ordering {
const max = unicode.MaxRune
o := ordering{
entryMap: make(map[string]*entry),
}
insert := func(typ logicalAnchor, s string, ce []int) {
e := &entry{
elems: []rawCE{{w: ce}},
str: s,
exclude: true,
logical: typ,
}
o.insert(e)
}
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
return o
}
// patchForInsert eleminates entries from the list with more than one collation element.
// The next and prev fields of the eliminated entries still point to appropriate
// values in the newly created list.
// It requires that sort has been called.
func (o *ordering) patchForInsert() {
for i := 0; i < len(o.ordered)-1; {
e := o.ordered[i]
lev := e.level
n := e.next
for ; n != nil && len(n.elems) > 1; n = n.next {
if n.level < lev {
lev = n.level
}
n.skipRemove = true
}
for ; o.ordered[i] != n; i++ {
o.ordered[i].level = lev
o.ordered[i].next = n
o.ordered[i+1].prev = e
}
}
}
// clone copies all ordering of es into a new ordering value.
func (o *ordering) clone() *ordering {
o.sort()
oo := ordering{
entryMap: make(map[string]*entry),
}
for _, e := range o.ordered {
ne := &entry{
runes: e.runes,
elems: e.elems,
str: e.str,
decompose: e.decompose,
exclude: e.exclude,
logical: e.logical,
}
oo.insert(ne)
}
oo.sort() // link all ordering.
oo.patchForInsert()
return &oo
}
// front returns the first entry to be indexed.
// It assumes that sort() has been called.
func (o *ordering) front() *entry {
e := o.ordered[0]
if e.prev != nil {
log.Panicf("unexpected first entry: %v", e)
}
// The first entry is always a logical position, which should not be indexed.
e, _ = e.nextIndexed()
return e
}
// sort sorts all ordering based on their collation elements and initializes
// the prev, next, and level fields accordingly.
func (o *ordering) sort() {
sort.Sort(sortedEntries(o.ordered))
l := o.ordered
for i := 1; i < len(l); i++ {
k := i - 1
l[k].next = l[i]
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
l[i].prev = l[k]
}
}
// genColElems generates a collation element array from the runes in str. This
// assumes that all collation elements have already been added to the Builder.
func (o *ordering) genColElems(str string) []rawCE {
elems := []rawCE{}
for _, r := range []rune(str) {
for _, ce := range o.find(string(r)).elems {
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
elems = append(elems, ce)
}
}
}
return elems
}

81
vendor/golang.org/x/text/collate/build/table.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"golang.org/x/text/internal/colltab"
)
// table is an intermediate structure that roughly resembles the table in collate.
type table struct {
colltab.Table
trie trie
root *trieHandle
}
// print writes the table as Go compilable code to w. It prefixes the
// variable names with name. It returns the number of bytes written
// and the size of the resulting table.
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
update := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
// Write arrays needed for the structure.
update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
update(printColElems(w, t.ContractElem, name+"ContractElem"))
update(t.trie.printArrays(w, name))
update(printArray(t.ContractTries, w, name))
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
update(nn, 0, e)
return
}
func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
p("\t{ // %s\n", id)
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
p("\t},\n")
return
}
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
p("// %s: %d entries, %d bytes\n", name, len(a), sz)
p("var %s = [%d]uint32 {", name, len(a))
for i, c := range a {
switch {
case i%64 == 0:
p("\n\t// Block %d, offset 0x%x\n", i/64, i)
case (i%64)%6 == 0:
p("\n\t")
}
p("0x%.8X, ", c)
}
p("\n}\n\n")
return
}

290
vendor/golang.org/x/text/collate/build/trie.go generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character
// in a UTF-8 string to a collation element.
// All but the last byte in a UTF-8 byte sequence are
// used to look up offsets in the index table to be used for the next byte.
// The last byte is used to index into a table of collation elements.
// This file contains the code for the generation of the trie.
package build
import (
"fmt"
"hash/fnv"
"io"
"reflect"
)
const (
blockSize = 64
blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
)
type trieHandle struct {
lookupStart uint16 // offset in table for first byte
valueStart uint16 // offset in table for first byte
}
type trie struct {
index []uint16
values []uint32
}
// trieNode is the intermediate trie structure used for generating a trie.
type trieNode struct {
index []*trieNode
value []uint32
b byte
refValue uint16
refIndex uint16
}
func newNode() *trieNode {
return &trieNode{
index: make([]*trieNode, 64),
value: make([]uint32, 128), // root node size is 128 instead of 64
}
}
func (n *trieNode) isInternal() bool {
return n.value != nil
}
func (n *trieNode) insert(r rune, value uint32) {
const maskx = 0x3F // mask out two most-significant bits
str := string(r)
if len(str) == 1 {
n.value[str[0]] = value
return
}
for i := 0; i < len(str)-1; i++ {
b := str[i] & maskx
if n.index == nil {
n.index = make([]*trieNode, blockSize)
}
nn := n.index[b]
if nn == nil {
nn = &trieNode{}
nn.b = b
n.index[b] = nn
}
n = nn
}
if n.value == nil {
n.value = make([]uint32, blockSize)
}
b := str[len(str)-1] & maskx
n.value[b] = value
}
type trieBuilder struct {
t *trie
roots []*trieHandle
lookupBlocks []*trieNode
valueBlocks []*trieNode
lookupBlockIdx map[uint32]*trieNode
valueBlockIdx map[uint32]*trieNode
}
func newTrieBuilder() *trieBuilder {
index := &trieBuilder{}
index.lookupBlocks = make([]*trieNode, 0)
index.valueBlocks = make([]*trieNode, 0)
index.lookupBlockIdx = make(map[uint32]*trieNode)
index.valueBlockIdx = make(map[uint32]*trieNode)
// The third nil is the default null block. The other two blocks
// are used to guarantee an offset of at least 3 for each block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
index.t = &trie{}
return index
}
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
hasher := fnv.New32()
if n.index != nil {
for i, nn := range n.index {
var vi, vv uint16
if nn != nil {
nn = b.computeOffsets(nn)
n.index[i] = nn
vi = nn.refIndex
vv = nn.refValue
}
hasher.Write([]byte{byte(vi >> 8), byte(vi)})
hasher.Write([]byte{byte(vv >> 8), byte(vv)})
}
h := hasher.Sum32()
nn, ok := b.lookupBlockIdx[h]
if !ok {
n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
b.lookupBlocks = append(b.lookupBlocks, n)
b.lookupBlockIdx[h] = n
} else {
n = nn
}
} else {
for _, v := range n.value {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks)) - blockOffset
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
b.valueBlockIdx[h] = n
} else {
n = nn
}
}
return n
}
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
hasher := fnv.New32()
for _, v := range n.value[:2*blockSize] {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks))
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
// Add a dummy block to accommodate the double block size.
b.valueBlocks = append(b.valueBlocks, nil)
b.valueBlockIdx[h] = n
} else {
n = nn
}
return n.refValue
}
func genValueBlock(t *trie, n *trieNode) {
if n != nil {
for _, v := range n.value {
t.values = append(t.values, v)
}
}
}
func genLookupBlock(t *trie, n *trieNode) {
for _, nn := range n.index {
v := uint16(0)
if nn != nil {
if n.index != nil {
v = nn.refIndex
} else {
v = nn.refValue
}
}
t.index = append(t.index, v)
}
}
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
h := &trieHandle{}
b.roots = append(b.roots, h)
h.valueStart = b.addStartValueBlock(n)
if len(b.roots) == 1 {
// We insert a null block after the first start value block.
// This ensures that continuation bytes UTF-8 sequences of length
// greater than 2 will automatically hit a null block if there
// was an undefined entry.
b.valueBlocks = append(b.valueBlocks, nil)
}
n = b.computeOffsets(n)
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
h.lookupStart = n.refIndex - 1
return h
}
// generate generates and returns the trie for n.
func (b *trieBuilder) generate() (t *trie, err error) {
t = b.t
if len(b.valueBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
}
if len(b.lookupBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
}
genValueBlock(t, b.valueBlocks[0])
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
for i := 2; i < len(b.valueBlocks); i++ {
genValueBlock(t, b.valueBlocks[i])
}
n := &trieNode{index: make([]*trieNode, 64)}
genLookupBlock(t, n)
genLookupBlock(t, n)
genLookupBlock(t, n)
for i := 3; i < len(b.lookupBlocks); i++ {
genLookupBlock(t, b.lookupBlocks[i])
}
return b.t, nil
}
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
nv := len(t.values)
p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
p("// Block 2 is the null block.\n")
p("var %sValues = [%d]uint32 {", name, nv)
var printnewline bool
for i, v := range t.values {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%4 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#04x:%#08x, ", i, v)
}
}
p("\n}\n\n")
ni := len(t.index)
p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
p("// Block 0 is the null block.\n")
p("var %sLookup = [%d]uint16 {", name, ni)
printnewline = false
for i, v := range t.index {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%8 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#03x:%#02x, ", i, v)
}
}
p("\n}\n\n")
return n, nv*4 + ni*2, err
}
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
sz += int(reflect.TypeOf(trie{}).Size())
return
}

403
vendor/golang.org/x/text/collate/collate.go generated vendored Normal file
View file

@ -0,0 +1,403 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// TODO: remove hard-coded versions when we have implemented fractional weights.
// The current implementation is incompatible with later CLDR versions.
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0
// Package collate contains types for comparing and sorting Unicode strings
// according to a given collation order.
package collate // import "golang.org/x/text/collate"
import (
"bytes"
"strings"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
)
// Collator provides functionality for comparing strings for a given
// collation order.
type Collator struct {
options
sorter sorter
_iter [2]iter
}
func (c *Collator) iter(i int) *iter {
// TODO: evaluate performance for making the second iterator optional.
return &c._iter[i]
}
// Supported returns the list of languages for which collating differs from its parent.
func Supported() []language.Tag {
// TODO: use language.Coverage instead.
t := make([]language.Tag, len(tags))
copy(t, tags)
return t
}
func init() {
ids := strings.Split(availableLocales, ",")
tags = make([]language.Tag, len(ids))
for i, s := range ids {
tags[i] = language.Raw.MustParse(s)
}
}
var tags []language.Tag
// New returns a new Collator initialized for the given locale.
func New(t language.Tag, o ...Option) *Collator {
index := colltab.MatchLang(t, tags)
c := newCollator(getTable(locales[index]))
// Set options from the user-supplied tag.
c.setFromTag(t)
// Set the user-supplied options.
c.setOptions(o)
c.init()
return c
}
// NewFromTable returns a new Collator for the given Weighter.
func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
c := newCollator(w)
c.setOptions(o)
c.init()
return c
}
func (c *Collator) init() {
if c.numeric {
c.t = colltab.NewNumericWeighter(c.t)
}
c._iter[0].init(c)
c._iter[1].init(c)
}
// Buffer holds keys generated by Key and KeyString.
type Buffer struct {
buf [4096]byte
key []byte
}
func (b *Buffer) init() {
if b.key == nil {
b.key = b.buf[:0]
}
}
// Reset clears the buffer from previous results generated by Key and KeyString.
func (b *Buffer) Reset() {
b.key = b.key[:0]
}
// Compare returns an integer comparing the two byte slices.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) Compare(a, b []byte) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInput(a)
c.iter(1).SetInput(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
return bytes.Compare(a, b)
}
return 0
}
// CompareString returns an integer comparing the two strings.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
func (c *Collator) CompareString(a, b string) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInputString(a)
c.iter(1).SetInputString(b)
if res := c.compare(); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
if a < b {
return -1
} else if a > b {
return 1
}
}
return 0
}
func compareLevel(f func(i *iter) int, a, b *iter) int {
a.pce = 0
b.pce = 0
for {
va := f(a)
vb := f(b)
if va != vb {
if va < vb {
return -1
}
return 1
} else if va == 0 {
break
}
}
return 0
}
func (c *Collator) compare() int {
ia, ib := c.iter(0), c.iter(1)
// Process primary level
if c.alternate != altShifted {
// TODO: implement script reordering
if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
return res
}
} else {
// TODO: handle shifted
}
if !c.ignore[colltab.Secondary] {
f := (*iter).nextSecondary
if c.backwards {
f = (*iter).prevSecondary
}
if res := compareLevel(f, ia, ib); res != 0 {
return res
}
}
// TODO: special case handling (Danish?)
if !c.ignore[colltab.Tertiary] || c.caseLevel {
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
return res
}
if !c.ignore[colltab.Quaternary] {
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
return res
}
}
}
return 0
}
// Key returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will remain
// valid until the next call to buf.Reset().
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
// See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElems(str))
}
// KeyFromString returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will retain
// valid until the next call to buf.ResetKeys().
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
// See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
buf.init()
return c.key(buf, c.getColElemsString(str))
}
func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
processWeights(c.alternate, c.t.Top(), w)
kn := len(buf.key)
c.keyFromElems(buf, w)
return buf.key[kn:]
}
func (c *Collator) getColElems(str []byte) []colltab.Elem {
i := c.iter(0)
i.SetInput(str)
for i.Next() {
}
return i.Elems
}
func (c *Collator) getColElemsString(str string) []colltab.Elem {
i := c.iter(0)
i.SetInputString(str)
for i.Next() {
}
return i.Elems
}
type iter struct {
wa [512]colltab.Elem
colltab.Iter
pce int
}
func (i *iter) init(c *Collator) {
i.Weighter = c.t
i.Elems = i.wa[:0]
}
func (i *iter) nextPrimary() int {
for {
for ; i.pce < i.N; i.pce++ {
if v := i.Elems[i.pce].Primary(); v != 0 {
i.pce++
return v
}
}
if !i.Next() {
return 0
}
}
panic("should not reach here")
}
func (i *iter) nextSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) prevSecondary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func (i *iter) nextTertiary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Tertiary(); v != 0 {
i.pce++
return int(v)
}
}
return 0
}
func (i *iter) nextQuaternary() int {
for ; i.pce < len(i.Elems); i.pce++ {
if v := i.Elems[i.pce].Quaternary(); v != 0 {
i.pce++
return v
}
}
return 0
}
func appendPrimary(key []byte, p int) []byte {
// Convert to variable length encoding; supports up to 23 bits.
if p <= 0x7FFF {
key = append(key, uint8(p>>8), uint8(p))
} else {
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
}
return key
}
// keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
for _, v := range ws {
if w := v.Primary(); w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
if !c.ignore[colltab.Secondary] {
buf.key = append(buf.key, 0, 0)
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.backwards {
for _, v := range ws {
if w := v.Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
} else {
for i := len(ws) - 1; i >= 0; i-- {
if w := ws[i].Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
}
} else if c.caseLevel {
buf.key = append(buf.key, 0, 0)
}
if !c.ignore[colltab.Tertiary] || c.caseLevel {
buf.key = append(buf.key, 0, 0)
for _, v := range ws {
if w := v.Tertiary(); w > 0 {
buf.key = append(buf.key, uint8(w))
}
}
// Derive the quaternary weights from the options and other levels.
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
// representation of a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly.
if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
if c.alternate == altShiftTrimmed {
lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
lastNonFFFF = len(buf.key)
}
}
buf.key = buf.key[:lastNonFFFF]
} else {
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
}
}
}
}
func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
ignore := false
vtop := int(top)
switch vw {
case altShifted, altShiftTrimmed:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && p != 0 {
wa[i] = colltab.MakeQuaternary(p)
ignore = true
} else if p == 0 {
if ignore {
wa[i] = colltab.Ignore
}
} else {
ignore = false
}
}
case altBlanked:
for i := range wa {
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
wa[i] = colltab.Ignore
ignore = true
} else {
ignore = false
}
}
}
}

32
vendor/golang.org/x/text/collate/index.go generated vendored Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import "golang.org/x/text/internal/colltab"
const blockSize = 64
func getTable(t tableIndex) *colltab.Table {
return &colltab.Table{
Index: colltab.Trie{
Index0: mainLookup[:][blockSize*t.lookupOffset:],
Values0: mainValues[:][blockSize*t.valuesOffset:],
Index: mainLookup[:],
Values: mainValues[:],
},
ExpandElem: mainExpandElem[:],
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
ContractElem: mainContractElem[:],
MaxContractLen: 18,
VariableTop: varTop,
}
}
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}

553
vendor/golang.org/x/text/collate/maketables.go generated vendored Normal file
View file

@ -0,0 +1,553 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Collation table generator.
// Data read from the web.
package main
import (
"archive/zip"
"bufio"
"bytes"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"sort"
"strconv"
"strings"
"unicode/utf8"
"golang.org/x/text/collate"
"golang.org/x/text/collate/build"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.Bool("draft", false, `Use draft versions, when available.`)
tags = flag.String("tags", "", "build tags to be included after +build directive")
pkg = flag.String("package", "collate",
"the name of the package in which the generated file is to be included")
tables = flagStringSetAllowAll("tables", "collate", "collate,chars",
"comma-spearated list of tables to generate.")
exclude = flagStringSet("exclude", "zh2", "",
"comma-separated list of languages to exclude.")
include = flagStringSet("include", "", "",
"comma-separated list of languages to include. Include trumps exclude.")
// TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons)
// TODO: Not included: traditional (buggy for Bengali)
types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "",
"comma-separated list of types that should be included.")
)
// stringSet implements an ordered set based on a list. It implements flag.Value
// to allow a set to be specified as a comma-separated list.
type stringSet struct {
s []string
allowed *stringSet
dirty bool // needs compaction if true
all bool
allowAll bool
}
func flagStringSet(name, def, allowed, usage string) *stringSet {
ss := &stringSet{}
if allowed != "" {
usage += fmt.Sprintf(" (allowed values: any of %s)", allowed)
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
}
ss.Set(def)
flag.Var(ss, name, usage)
return ss
}
func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet {
ss := &stringSet{allowAll: true}
if allowed == "" {
flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`))
} else {
ss.allowed = &stringSet{}
failOnError(ss.allowed.Set(allowed))
flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed))
}
ss.Set(def)
return ss
}
func (ss stringSet) Len() int {
return len(ss.s)
}
func (ss stringSet) String() string {
return strings.Join(ss.s, ",")
}
func (ss *stringSet) Set(s string) error {
if ss.allowAll && s == "all" {
ss.s = nil
ss.all = true
return nil
}
ss.s = ss.s[:0]
for _, s := range strings.Split(s, ",") {
if s := strings.TrimSpace(s); s != "" {
if ss.allowed != nil && !ss.allowed.contains(s) {
return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed)
}
ss.add(s)
}
}
ss.compact()
return nil
}
func (ss *stringSet) add(s string) {
ss.s = append(ss.s, s)
ss.dirty = true
}
func (ss *stringSet) values() []string {
ss.compact()
return ss.s
}
func (ss *stringSet) contains(s string) bool {
if ss.all {
return true
}
for _, v := range ss.s {
if v == s {
return true
}
}
return false
}
func (ss *stringSet) compact() {
if !ss.dirty {
return
}
a := ss.s
sort.Strings(a)
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
a[k+1] = a[i]
k++
}
}
ss.s = a[:k+1]
ss.dirty = false
}
func skipLang(l string) bool {
if include.Len() > 0 {
return !include.contains(l)
}
return exclude.contains(l)
}
// altInclude returns a list of alternatives (for the LDML alt attribute)
// in order of preference. An empty string in this list indicates the
// default entry.
func altInclude() []string {
l := []string{}
if *short {
l = append(l, "short")
}
l = append(l, "")
// TODO: handle draft using cldr.SetDraftLevel
if *draft {
l = append(l, "proposed")
}
return l
}
func failOnError(e error) {
if e != nil {
log.Panic(e)
}
}
func openArchive() *zip.Reader {
f := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(f)
f.Close()
failOnError(err)
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
failOnError(err)
return archive
}
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in https://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
var r io.ReadCloser
var err error
for _, f := range openArchive().File {
if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
r, err = f.Open()
}
}
if r == nil {
log.Fatal("File allkeys_CLDR.txt not found in archive.")
}
failOnError(err)
defer r.Close()
scanner := bufio.NewScanner(r)
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
for i := 1; scanner.Scan(); i++ {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
if line[0] == '@' {
// parse properties
switch {
case strings.HasPrefix(line[1:], "version "):
a := strings.Split(line[1:], " ")
if a[1] != gen.UnicodeVersion() {
log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
}
case strings.HasPrefix(line[1:], "backwards "):
log.Fatalf("%d: unsupported option backwards", i)
default:
log.Printf("%d: unknown option %s", i, line[1:])
}
} else {
// parse entries
part := strings.Split(line, " ; ")
if len(part) != 2 {
log.Fatalf("%d: production rule without ';': %v", i, line)
}
lhs := []rune{}
for _, v := range strings.Split(part[0], " ") {
if v == "" {
continue
}
lhs = append(lhs, rune(convHex(i, v)))
}
var n int
var vars []int
rhs := [][]int{}
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
n += len(m[0])
elem := []int{}
for _, h := range strings.Split(m[2], ".") {
elem = append(elem, convHex(i, h))
}
if m[1] == "*" {
vars = append(vars, i)
}
rhs = append(rhs, elem)
}
if len(part[1]) < n+3 || part[1][n+1] != '#' {
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
}
if *test {
testInput.add(string(lhs))
}
failOnError(builder.Add(lhs, rhs, vars))
}
}
if scanner.Err() != nil {
log.Fatal(scanner.Err())
}
}
func convHex(line int, s string) int {
r, e := strconv.ParseInt(s, 16, 32)
if e != nil {
log.Fatalf("%d: %v", line, e)
}
return int(r)
}
var testInput = stringSet{}
var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`)
var tagRe = regexp.MustCompile(`<([a-z_]*) */>`)
var mainLocales = []string{}
// charsets holds a list of exemplar characters per category.
type charSets map[string][]string
func (p charSets) fprint(w io.Writer) {
fmt.Fprintln(w, "[exN]string{")
for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} {
if set := p[k]; len(set) != 0 {
fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " "))
}
}
fmt.Fprintln(w, "\t},")
}
var localeChars = make(map[string]charSets)
const exemplarHeader = `
type exemplarType int
const (
exCharacters exemplarType = iota
exContractions
exPunctuation
exAuxiliary
exCurrency
exIndex
exN
)
`
func printExemplarCharacters(w io.Writer) {
fmt.Fprintln(w, exemplarHeader)
fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{")
for _, loc := range mainLocales {
fmt.Fprintf(w, "\t%q: ", loc)
localeChars[loc].fprint(w)
}
fmt.Fprintln(w, "}")
}
func decodeCLDR(d *cldr.Decoder) *cldr.CLDR {
r := gen.OpenCLDRCoreZip()
data, err := d.DecodeZip(r)
failOnError(err)
return data
}
// parseMain parses XML files in the main directory of the CLDR core.zip file.
func parseMain() {
d := &cldr.Decoder{}
d.SetDirFilter("main")
d.SetSectionFilter("characters")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x := data.RawLDML(loc)
if skipLang(x.Identity.Language.Type) {
continue
}
if x.Characters != nil {
x, _ = data.LDML(loc)
loc = language.Make(loc).String()
for _, ec := range x.Characters.ExemplarCharacters {
if ec.Draft != "" {
continue
}
if _, ok := localeChars[loc]; !ok {
mainLocales = append(mainLocales, loc)
localeChars[loc] = make(charSets)
}
localeChars[loc][ec.Type] = parseCharacters(ec.Data())
}
}
}
}
func parseCharacters(chars string) []string {
parseSingle := func(s string) (r rune, tail string, escaped bool) {
if s[0] == '\\' {
return rune(s[1]), s[2:], true
}
r, sz := utf8.DecodeRuneInString(s)
return r, s[sz:], false
}
chars = strings.TrimSpace(chars)
if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' {
chars = chars[1:n]
}
list := []string{}
var r, last, end rune
for len(chars) > 0 {
if chars[0] == '{' { // character sequence
buf := []rune{}
for chars = chars[1:]; len(chars) > 0; {
r, chars, _ = parseSingle(chars)
if r == '}' {
break
}
if r == ' ' {
log.Fatalf("space not supported in sequence %q", chars)
}
buf = append(buf, r)
}
list = append(list, string(buf))
last = 0
} else { // single character
escaped := false
r, chars, escaped = parseSingle(chars)
if r != ' ' {
if r == '-' && !escaped {
if last == 0 {
log.Fatal("'-' should be preceded by a character")
}
end, chars, _ = parseSingle(chars)
for ; last <= end; last++ {
list = append(list, string(last))
}
last = 0
} else {
list = append(list, string(r))
last = r
}
}
}
}
return list
}
var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`)
// typeMap translates legacy type keys to their BCP47 equivalent.
var typeMap = map[string]string{
"phonebook": "phonebk",
"traditional": "trad",
}
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
d := &cldr.Decoder{}
d.SetDirFilter("collation")
data := decodeCLDR(d)
for _, loc := range data.Locales() {
x, err := data.LDML(loc)
failOnError(err)
if skipLang(x.Identity.Language.Type) {
continue
}
cs := x.Collations.Collation
sl := cldr.MakeSlice(&cs)
if len(types.s) == 0 {
sl.SelectAnyOf("type", x.Collations.Default())
} else if !types.all {
sl.SelectAnyOf("type", types.s...)
}
sl.SelectOnePerGroup("alt", altInclude())
for _, c := range cs {
id, err := language.Parse(loc)
if err != nil {
fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
continue
}
// Support both old- and new-style defaults.
d := c.Type
if x.Collations.DefaultCollation == nil {
d = x.Collations.Default()
} else {
d = x.Collations.DefaultCollation.Data()
}
// We assume tables are being built either for search or collation,
// but not both. For search the default is always "search".
if d != c.Type && c.Type != "search" {
typ := c.Type
if len(c.Type) > 8 {
typ = typeMap[c.Type]
}
id, err = id.SetTypeForKey("co", typ)
failOnError(err)
}
t := b.Tailoring(id)
c.Process(processor{t})
}
}
}
type processor struct {
t *build.Tailoring
}
func (p processor) Reset(anchor string, before int) (err error) {
if before != 0 {
err = p.t.SetAnchorBefore(anchor)
} else {
err = p.t.SetAnchor(anchor)
}
failOnError(err)
return nil
}
func (p processor) Insert(level int, str, context, extend string) error {
str = context + str
if *test {
testInput.add(str)
}
// TODO: mimic bug in old maketables: remove.
err := p.t.Insert(colltab.Level(level-1), str, context+extend)
failOnError(err)
return nil
}
func (p processor) Index(id string) {
}
func testCollator(c *collate.Collator) {
c0 := collate.New(language.Und)
// iterator over all characters for all locales and check
// whether Key is equal.
buf := collate.Buffer{}
// Add all common and not too uncommon runes to the test set.
for i := rune(0); i < 0x30000; i++ {
testInput.add(string(i))
}
for i := rune(0xE0000); i < 0xF0000; i++ {
testInput.add(string(i))
}
for _, str := range testInput.values() {
k0 := c0.KeyFromString(&buf, str)
k := c.KeyFromString(&buf, str)
if !bytes.Equal(k0, k) {
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
}
buf.Reset()
}
fmt.Println("PASS")
}
func main() {
gen.Init()
b := build.NewBuilder()
parseUCA(b)
if tables.contains("chars") {
parseMain()
}
parseCollation(b)
c, err := b.Build()
failOnError(err)
if *test {
testCollator(collate.NewFromTable(c))
} else {
w := &bytes.Buffer{}
gen.WriteUnicodeVersion(w)
gen.WriteCLDRVersion(w)
if tables.contains("collate") {
_, err = b.Print(w)
failOnError(err)
}
if tables.contains("chars") {
printExemplarCharacters(w)
}
gen.WriteGoFile("tables.go", *pkg, w.Bytes())
}
}

239
vendor/golang.org/x/text/collate/option.go generated vendored Normal file
View file

@ -0,0 +1,239 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"sort"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// newCollator creates a new collator with default options configured.
func newCollator(t colltab.Weighter) *Collator {
// Initialize a collator with default options.
c := &Collator{
options: options{
ignore: [colltab.NumLevels]bool{
colltab.Quaternary: true,
colltab.Identity: true,
},
f: norm.NFD,
t: t,
},
}
// TODO: store vt in tags or remove.
c.variableTop = t.Top()
return c
}
// An Option is used to change the behavior of a Collator. Options override the
// settings passed through the locale identifier.
type Option struct {
priority int
f func(o *options)
}
type prioritizedOptions []Option
func (p prioritizedOptions) Len() int {
return len(p)
}
func (p prioritizedOptions) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
func (p prioritizedOptions) Less(i, j int) bool {
return p[i].priority < p[j].priority
}
type options struct {
// ignore specifies which levels to ignore.
ignore [colltab.NumLevels]bool
// caseLevel is true if there is an additional level of case matching
// between the secondary and tertiary levels.
caseLevel bool
// backwards specifies the order of sorting at the secondary level.
// This option exists predominantly to support reverse sorting of accents in French.
backwards bool
// numeric specifies whether any sequence of decimal digits (category is Nd)
// is sorted at a primary level with its numeric value.
// For example, "A-21" < "A-123".
// This option is set by wrapping the main Weighter with NewNumericWeighter.
numeric bool
// alternate specifies an alternative handling of variables.
alternate alternateHandling
// variableTop is the largest primary value that is considered to be
// variable.
variableTop uint32
t colltab.Weighter
f norm.Form
}
func (o *options) setOptions(opts []Option) {
sort.Sort(prioritizedOptions(opts))
for _, x := range opts {
x.f(o)
}
}
// OptionsFromTag extracts the BCP47 collation options from the tag and
// configures a collator accordingly. These options are set before any other
// option.
func OptionsFromTag(t language.Tag) Option {
return Option{0, func(o *options) {
o.setFromTag(t)
}}
}
func (o *options) setFromTag(t language.Tag) {
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
o.backwards = ldmlBool(t, o.backwards, "kb")
o.numeric = ldmlBool(t, o.numeric, "kn")
// Extract settings from the BCP47 u extension.
switch t.TypeForKey("ks") { // strength
case "level1":
o.ignore[colltab.Secondary] = true
o.ignore[colltab.Tertiary] = true
case "level2":
o.ignore[colltab.Tertiary] = true
case "level3", "":
// The default.
case "level4":
o.ignore[colltab.Quaternary] = false
case "identic":
o.ignore[colltab.Quaternary] = false
o.ignore[colltab.Identity] = false
}
switch t.TypeForKey("ka") {
case "shifted":
o.alternate = altShifted
// The following two types are not official BCP47, but we support them to
// give access to this otherwise hidden functionality. The name blanked is
// derived from the LDML name blanked and posix reflects the main use of
// the shift-trimmed option.
case "blanked":
o.alternate = altBlanked
case "posix":
o.alternate = altShiftTrimmed
}
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
// Not used:
// - normalization ("kk", not necessary for this implementation)
// - hiraganaQuatenary ("kh", obsolete)
}
func ldmlBool(t language.Tag, old bool, key string) bool {
switch t.TypeForKey(key) {
case "true":
return true
case "false":
return false
default:
return old
}
}
var (
// IgnoreCase sets case-insensitive comparison.
IgnoreCase Option = ignoreCase
ignoreCase = Option{3, ignoreCaseF}
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
IgnoreDiacritics Option = ignoreDiacritics
ignoreDiacritics = Option{3, ignoreDiacriticsF}
// IgnoreWidth causes full-width characters to match their half-width
// equivalents.
IgnoreWidth Option = ignoreWidth
ignoreWidth = Option{2, ignoreWidthF}
// Loose sets the collator to ignore diacritics, case and width.
Loose Option = loose
loose = Option{4, looseF}
// Force ordering if strings are equivalent but not equal.
Force Option = force
force = Option{5, forceF}
// Numeric specifies that numbers should sort numerically ("2" < "12").
Numeric Option = numeric
numeric = Option{5, numericF}
)
func ignoreWidthF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = true
}
func ignoreDiacriticsF(o *options) {
o.ignore[colltab.Secondary] = true
}
func ignoreCaseF(o *options) {
o.ignore[colltab.Tertiary] = true
o.caseLevel = false
}
func looseF(o *options) {
ignoreWidthF(o)
ignoreDiacriticsF(o)
ignoreCaseF(o)
}
func forceF(o *options) {
o.ignore[colltab.Identity] = false
}
func numericF(o *options) { o.numeric = true }
// Reorder overrides the pre-defined ordering of scripts and character sets.
func Reorder(s ...string) Option {
// TODO: need fractional weights to implement this.
panic("TODO: implement")
}
// TODO: consider making these public again. These options cannot be fully
// specified in BCP47, so an API interface seems warranted. Still a higher-level
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
// alternateHandling identifies the various ways in which variables are handled.
// A rune with a primary weight lower than the variable top is considered a
// variable.
// See https://www.unicode.org/reports/tr10/#Variable_Weighting for details.
type alternateHandling int
const (
// altNonIgnorable turns off special handling of variables.
altNonIgnorable alternateHandling = iota
// altBlanked sets variables and all subsequent primary ignorables to be
// ignorable at all levels. This is identical to removing all variables
// and subsequent primary ignorables from the input.
altBlanked
// altShifted sets variables to be ignorable for levels one through three and
// adds a fourth level based on the values of the ignored levels.
altShifted
// altShiftTrimmed is a slight variant of altShifted that is used to
// emulate POSIX.
altShiftTrimmed
)

81
vendor/golang.org/x/text/collate/sort.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"bytes"
"sort"
)
const (
maxSortBuffer = 40960
maxSortEntries = 4096
)
type swapper interface {
Swap(i, j int)
}
type sorter struct {
buf *Buffer
keys [][]byte
src swapper
}
func (s *sorter) init(n int) {
if s.buf == nil {
s.buf = &Buffer{}
s.buf.init()
}
if cap(s.keys) < n {
s.keys = make([][]byte, n)
}
s.keys = s.keys[0:n]
}
func (s *sorter) sort(src swapper) {
s.src = src
sort.Sort(s)
}
func (s sorter) Len() int {
return len(s.keys)
}
func (s sorter) Less(i, j int) bool {
return bytes.Compare(s.keys[i], s.keys[j]) == -1
}
func (s sorter) Swap(i, j int) {
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
s.src.Swap(i, j)
}
// A Lister can be sorted by Collator's Sort method.
type Lister interface {
Len() int
Swap(i, j int)
// Bytes returns the bytes of the text at index i.
Bytes(i int) []byte
}
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
func (c *Collator) Sort(x Lister) {
n := x.Len()
c.sorter.init(n)
for i := 0; i < n; i++ {
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
}
c.sorter.sort(x)
}
// SortStrings uses sort.Sort to sort the strings in x using the rules of c.
func (c *Collator) SortStrings(x []string) {
c.sorter.init(len(x))
for i, s := range x {
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
}
c.sorter.sort(sort.StringSlice(x))
}

73789
vendor/golang.org/x/text/collate/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff

371
vendor/golang.org/x/text/internal/colltab/collelem.go generated vendored Normal file
View file

@ -0,0 +1,371 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"unicode"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
NumLevels
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
MaxQuaternary = 0x1FFFFF // 21 bits.
)
// Elem is a representation of a collation element. This API provides ways to encode
// and decode Elems. Implementations of collation tables may use values greater
// or equal to PrivateUse for their own purposes. However, these should never be
// returned by AppendNext.
type Elem uint32
const (
maxCE Elem = 0xAFFFFFFF
PrivateUse = minContract
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
)
type ceType int
const (
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
ceContractionIndex // rune can be a start of a contraction
ceExpansionIndex // rune expands into a sequence of collation elements
ceDecompose // rune expands using NFKC decomposition
)
func (ce Elem) ctype() ceType {
if ce <= maxCE {
return ceNormal
}
if ce <= maxContract {
return ceContractionIndex
} else {
if ce <= maxExpand {
return ceExpansionIndex
}
return ceDecompose
}
panic("should not reach here")
return ceType(-1)
}
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the canonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value
const (
ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000
ceIgnoreMask = 0xF00FFFFF
ceType1 = 0x40000000
ceType2 = 0x00000000
ceType3or4 = 0x80000000
ceType4 = 0xA0000000
ceTypeQ = 0xC0000000
Ignore = ceType4
firstNonPrimary = 0x80000000
lastSpecialPrimary = 0xA0000000
secondaryMask = 0x80000000
hasTertiaryMask = 0x40000000
primaryValueMask = 0x3FFFFE00
maxPrimaryBits = 21
compactPrimaryBits = 16
maxSecondaryBits = 12
maxTertiaryBits = 8
maxCCCBits = 8
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryCompactBits = 5
primaryShift = 9
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
)
func makeImplicitCE(primary int) Elem {
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
}
// MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := Elem(0)
if primary != 0 {
if ccc != 0 {
if primary >= 1<<compactPrimaryBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
}
if secondary != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
}
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
ce |= Elem(ccc) << compactPrimaryBits
ce |= Elem(primary)
ce |= ceType3or4
} else if tertiary == defaultTertiary {
if secondary >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
}
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
ce |= ceType1
} else {
d := secondary - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if tertiary >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
}
ce = Elem(primary<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
}
} else {
ce = Elem(secondary<<maxTertiaryBits + tertiary)
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= ceType4
}
return ce, nil
}
// MakeQuaternary returns an Elem with the given quaternary value.
func MakeQuaternary(v int) Elem {
return ceTypeQ | Elem(v<<primaryShift)
}
// Mask sets weights for any level smaller than l to 0.
// The resulting Elem can be used to test for equality with
// other Elems to which the same mask has been applied.
func (ce Elem) Mask(l Level) uint32 {
return 0
}
// CCC returns the canonical combining class associated with the underlying character,
// if applicable, or 0 otherwise.
func (ce Elem) CCC() uint8 {
if ce&ceType3or4 != 0 {
if ce&ceType4 == ceType3or4 {
return uint8(ce >> 16)
}
return uint8(ce >> 20)
}
return 0
}
// Primary returns the primary collation weight for ce.
func (ce Elem) Primary() int {
if ce >= firstNonPrimary {
if ce > lastSpecialPrimary {
return 0
}
return int(uint16(ce))
}
return int(ce&primaryValueMask) >> primaryShift
}
// Secondary returns the secondary collation weight for ce.
func (ce Elem) Secondary() int {
switch ce & ceTypeMask {
case ceType1:
return int(uint8(ce))
case ceType2:
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
case ceType3or4:
if ce < ceType4 {
return defaultSecondary
}
return int(ce>>8) & 0xFFF
case ceTypeQ:
return 0
}
panic("should not reach here")
}
// Tertiary returns the tertiary collation weight for ce.
func (ce Elem) Tertiary() uint8 {
if ce&hasTertiaryMask == 0 {
if ce&ceType3or4 == 0 {
return uint8(ce & 0x1F)
}
if ce&ceType4 == ceType4 {
return uint8(ce)
}
return uint8(ce>>24) & 0x1F // type 2
} else if ce&ceTypeMask == ceType1 {
return defaultTertiary
}
// ce is a quaternary value.
return 0
}
func (ce Elem) updateTertiary(t uint8) Elem {
if ce&ceTypeMask == ceType1 {
// convert to type 4
nce := ce & primaryValueMask
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
ce = nce
} else if ce&ceTypeMaskExt == ceType3or4 {
ce &= ^Elem(maxTertiary << 24)
return ce | (Elem(t) << 24)
} else {
// type 2 or 4
ce &= ^Elem(maxTertiary)
}
return ce | Elem(t)
}
// Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == Ignore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants.
func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift
} else if ce&ceIgnoreMask == Ignore {
return 0
}
return MaxQuaternary
}
// Weight returns the collation weight for the given level.
func (ce Elem) Weight(l Level) int {
switch l {
case Primary:
return ce.Primary()
case Secondary:
return ce.Secondary()
case Tertiary:
return int(ce.Tertiary())
case Quaternary:
return ce.Quaternary()
}
return 0 // return 0 (ignore) for undefined levels.
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func splitContractIndex(ce Elem) (index, n, offset int) {
n = int(ce & (1<<maxNBits - 1))
ce >>= maxNBits
index = int(ce & (1<<maxTrieIndexBits - 1))
ce >>= maxTrieIndexBits
offset = int(ce & (1<<maxContractOffsetBits - 1))
return
}
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const maxExpandIndexBits = 16
func splitExpandIndex(ce Elem) (index int) {
return int(uint16(ce))
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
func splitDecompose(ce Elem) (t1, t2 uint8) {
return uint8(ce), uint8(ce >> 8)
}
const (
// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// https://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}

105
vendor/golang.org/x/text/internal/colltab/colltab.go generated vendored Normal file
View file

@ -0,0 +1,105 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package colltab contains functionality related to collation tables.
// It is only to be used by the collate and search packages.
package colltab // import "golang.org/x/text/internal/colltab"
import (
"sort"
"golang.org/x/text/language"
)
// MatchLang finds the index of t in tags, using a matching algorithm used for
// collation and search. tags[0] must be language.Und, the remaining tags should
// be sorted alphabetically.
//
// Language matching for collation and search is different from the matching
// defined by language.Matcher: the (inferred) base language must be an exact
// match for the relevant fields. For example, "gsw" should not match "de".
// Also the parent relation is different, as a parent may have a different
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
// zh.
func MatchLang(t language.Tag, tags []language.Tag) int {
// Canonicalize the values, including collapsing macro languages.
t, _ = language.All.Canonicalize(t)
base, conf := t.Base()
// Estimate the base language, but only use high-confidence values.
if conf < language.High {
// The root locale supports "search" and "standard". We assume that any
// implementation will only use one of both.
return 0
}
// Maximize base and script and normalize the tag.
if _, s, r := t.Raw(); (r != language.Region{}) {
p, _ := language.Raw.Compose(base, s, r)
// Taking the parent forces the script to be maximized.
p = p.Parent()
// Add back region and extensions.
t, _ = language.Raw.Compose(p, r, t.Extensions())
} else {
// Set the maximized base language.
t, _ = language.Raw.Compose(base, s, t.Extensions())
}
// Find start index of the language tag.
start := 1 + sort.Search(len(tags)-1, func(i int) bool {
b, _, _ := tags[i+1].Raw()
return base.String() <= b.String()
})
if start < len(tags) {
if b, _, _ := tags[start].Raw(); b != base {
return 0
}
}
// Besides the base language, script and region, only the collation type and
// the custom variant defined in the 'u' extension are used to distinguish a
// locale.
// Strip all variants and extensions and add back the custom variant.
tdef, _ := language.Raw.Compose(t.Raw())
tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
// First search for a specialized collation type, if present.
try := []language.Tag{tdef}
if co := t.TypeForKey("co"); co != "" {
tco, _ := tdef.SetTypeForKey("co", co)
try = []language.Tag{tco, tdef}
}
for _, tx := range try {
for ; tx != language.Und; tx = parent(tx) {
for i, t := range tags[start:] {
if b, _, _ := t.Raw(); b != base {
break
}
if tx == t {
return start + i
}
}
}
}
return 0
}
// parent computes the structural parent. This means inheritance may change
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
func parent(t language.Tag) language.Tag {
if t.TypeForKey("va") != "" {
t, _ = t.SetTypeForKey("va", "")
return t
}
result := language.Und
if b, s, r := t.Raw(); (r != language.Region{}) {
result, _ = language.Raw.Compose(b, s, t.Extensions())
} else if (s != language.Script{}) {
result, _ = language.Raw.Compose(b, t.Extensions())
} else if (b != language.Base{}) {
result, _ = language.Raw.Compose(t.Extensions())
}
return result
}

145
vendor/golang.org/x/text/internal/colltab/contract.go generated vendored Normal file
View file

@ -0,0 +1,145 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import "unicode/utf8"
// For a description of ContractTrieSet, see text/collate/build/contract.go.
type ContractTrieSet []struct{ L, H, N, I uint8 }
// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
states ContractTrieSet
s []byte
n int
index int
pindex int
done bool
}
type ctScannerString struct {
states ContractTrieSet
s string
n int
index int
pindex int
done bool
}
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
return ctScanner{s: b, states: t[index:], n: n}
}
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
return ctScannerString{s: str, states: t[index:], n: n}
}
// result returns the offset i and bytes consumed p so far. If no suffix
// matched, i and p will be 0.
func (s *ctScanner) result() (i, p int) {
return s.index, s.pindex
}
func (s *ctScannerString) result() (i, p int) {
return s.index, s.pindex
}
const (
final = 0
noIndex = 0xFF
)
// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func (s *ctScanner) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}
// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}

178
vendor/golang.org/x/text/internal/colltab/iter.go generated vendored Normal file
View file

@ -0,0 +1,178 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// An Iter incrementally converts chunks of the input text to collation
// elements, while ensuring that the collation elements are in normalized order
// (that is, they are in the order as if the input text were normalized first).
type Iter struct {
Weighter Weighter
Elems []Elem
// N is the number of elements in Elems that will not be reordered on
// subsequent iterations, N <= len(Elems).
N int
bytes []byte
str string
// Because the Elems buffer may contain collation elements that are needed
// for look-ahead, we need two positions in the text (bytes or str): one for
// the end position in the text for the current iteration and one for the
// start of the next call to appendNext.
pEnd int // end position in text corresponding to N.
pNext int // pEnd <= pNext.
}
// Reset sets the position in the current input text to p and discards any
// results obtained so far.
func (i *Iter) Reset(p int) {
i.Elems = i.Elems[:0]
i.N = 0
i.pEnd = p
i.pNext = p
}
// Len returns the length of the input text.
func (i *Iter) Len() int {
if i.bytes != nil {
return len(i.bytes)
}
return len(i.str)
}
// Discard removes the collation elements up to N.
func (i *Iter) Discard() {
// TODO: change this such that only modifiers following starters will have
// to be copied.
i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
i.N = 0
}
// End returns the end position of the input text for which Next has returned
// results.
func (i *Iter) End() int {
return i.pEnd
}
// SetInput resets i to input s.
func (i *Iter) SetInput(s []byte) {
i.bytes = s
i.str = ""
i.Reset(0)
}
// SetInputString resets i to input s.
func (i *Iter) SetInputString(s string) {
i.str = s
i.bytes = nil
i.Reset(0)
}
func (i *Iter) done() bool {
return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
}
func (i *Iter) appendNext() bool {
if i.done() {
return false
}
var sz int
if i.bytes == nil {
i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
} else {
i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
}
if sz == 0 {
sz = 1
}
i.pNext += sz
return true
}
// Next appends Elems to the internal array. On each iteration, it will either
// add starters or modifiers. In the majority of cases, an Elem with a primary
// value > 0 will have a CCC of 0. The CCC values of collation elements are also
// used to detect if the input string was not normalized and to adjust the
// result accordingly.
func (i *Iter) Next() bool {
if i.N == len(i.Elems) && !i.appendNext() {
return false
}
// Check if the current segment starts with a starter.
prevCCC := i.Elems[len(i.Elems)-1].CCC()
if prevCCC == 0 {
i.N = len(i.Elems)
i.pEnd = i.pNext
return true
} else if i.Elems[i.N].CCC() == 0 {
// set i.N to only cover part of i.Elems for which prevCCC == 0 and
// use rest for the next call to next.
for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
}
i.pEnd = i.pNext
return true
}
// The current (partial) segment starts with modifiers. We need to collect
// all successive modifiers to ensure that they are normalized.
for {
p := len(i.Elems)
i.pEnd = i.pNext
if !i.appendNext() {
break
}
if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
// Leave the starter for the next iteration. This ensures that we
// do not return sequences of collation elements that cross two
// segments.
//
// TODO: handle large number of combining characters by fully
// normalizing the input segment before iteration. This ensures
// results are consistent across the text repo.
i.N = p
return true
} else if ccc < prevCCC {
i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
} else {
prevCCC = ccc
}
}
done := len(i.Elems) != i.N
i.N = len(i.Elems)
return done
}
// nextNoNorm is the same as next, but does not "normalize" the collation
// elements.
func (i *Iter) nextNoNorm() bool {
// TODO: remove this function. Using this instead of next does not seem
// to improve performance in any significant way. We retain this until
// later for evaluation purposes.
if i.done() {
return false
}
i.appendNext()
i.N = len(i.Elems)
return true
}
const maxCombiningCharacters = 30
// doNorm reorders the collation elements in i.Elems.
// It assumes that blocks of collation elements added with appendNext
// either start and end with the same CCC or start with CCC == 0.
// This allows for a single insertion point for the entire block.
// The correctness of this assumption is verified in builder.go.
func (i *Iter) doNorm(p int, ccc uint8) {
n := len(i.Elems)
k := p
for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
}
i.Elems = append(i.Elems, i.Elems[p:k]...)
copy(i.Elems[p:], i.Elems[k:])
i.Elems = i.Elems[:n]
}

236
vendor/golang.org/x/text/internal/colltab/numeric.go generated vendored Normal file
View file

@ -0,0 +1,236 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode"
"unicode/utf8"
)
// NewNumericWeighter wraps w to replace individual digits to sort based on their
// numeric value.
//
// Weighter w must have a free primary weight after the primary weight for 9.
// If this is not the case, numeric value will sort at the same primary level
// as the first primary sorting after 9.
func NewNumericWeighter(w Weighter) Weighter {
getElem := func(s string) Elem {
elems, _ := w.AppendNextString(nil, s)
return elems[0]
}
nine := getElem("9")
// Numbers should order before zero, but the DUCET has no room for this.
// TODO: move before zero once we use fractional collation elements.
ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
return &numericWeighter{
Weighter: w,
// We assume that w sorts digits of different kinds in order of numeric
// value and that the tertiary weight order is preserved.
//
// TODO: evaluate whether it is worth basing the ranges on the Elem
// encoding itself once the move to fractional weights is complete.
zero: getElem("0"),
zeroSpecialLo: getElem(""), // U+FF10 FULLWIDTH DIGIT ZERO
zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
nine: nine,
nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
numberStart: ns,
}
}
// A numericWeighter translates a stream of digits into a stream of weights
// representing the numeric value.
type numericWeighter struct {
Weighter
// The Elems below all demarcate boundaries of specific ranges. With the
// current element encoding digits are in two ranges: normal (default
// tertiary value) and special. For most languages, digits have collation
// elements in the normal range.
//
// Note: the range tests are very specific for the element encoding used by
// this implementation. The tests in collate_test.go are designed to fail
// if this code is not updated when an encoding has changed.
zero Elem // normal digit zero
zeroSpecialLo Elem // special digit zero, low tertiary value
zeroSpecialHi Elem // special digit zero, high tertiary value
nine Elem // normal digit nine
nineSpecialHi Elem // special digit nine
numberStart Elem
}
// AppendNext calls the namesake of the underlying weigher, but replaces single
// digits with weights representing their value.
func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNext(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
b: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
// AppendNextString calls the namesake of the underlying weigher, but replaces
// single digits with weights representing their value.
func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNextString(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
s: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
type numberConverter struct {
w *numericWeighter
elems []Elem
nDigits int
lenIndex int
s string // set if the input was of type string
b []byte // set if the input was of type []byte
}
// init completes initialization of a numberConverter and prepares it for adding
// more digits. elems is assumed to have a digit starting at oldLen.
func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
copy(elems[oldLen+2:], elems[oldLen:])
elems[oldLen] = nc.w.numberStart
elems[oldLen+1] = 0
nc.nDigits = 1
}
nc.elems = elems
nc.lenIndex = oldLen + 1
}
// checkNextDigit reports whether bufNew adds a single digit relative to the old
// buffer. If it does, it also reports whether this digit is zero.
func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
if len(nc.elems) >= len(bufNew) {
return false, false
}
e := bufNew[len(nc.elems)]
if e < nc.w.zeroSpecialLo || nc.w.nine < e {
// Not a number.
return false, false
}
if e < nc.w.zero {
if e > nc.w.nineSpecialHi {
// Not a number.
return false, false
}
if !nc.isDigit() {
return false, false
}
isZero = e <= nc.w.zeroSpecialHi
} else {
// This is the common case if we encounter a digit.
isZero = e == nc.w.zero
}
// Test the remaining added collation elements have a zero primary value.
if n := len(bufNew) - len(nc.elems); n > 1 {
for i := len(nc.elems) + 1; i < len(bufNew); i++ {
if bufNew[i].Primary() != 0 {
return false, false
}
}
// In some rare cases, collation elements will encode runes in
// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
// are not in Nd. Also some digits that clearly belong in unicode.No,
// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
// collation elements indistinguishable from normal digits.
// Unfortunately, this means we need to make this check for nearly all
// non-Latin digits.
//
// TODO: check the performance impact and find something better if it is
// an issue.
if !nc.isDigit() {
return false, false
}
}
return isZero, true
}
func (nc *numberConverter) isDigit() bool {
if nc.b != nil {
r, _ := utf8.DecodeRune(nc.b)
return unicode.In(r, unicode.Nd)
}
r, _ := utf8.DecodeRuneInString(nc.s)
return unicode.In(r, unicode.Nd)
}
// We currently support a maximum of about 2M digits (the number of primary
// values). Such numbers will compare correctly against small numbers, but their
// comparison against other large numbers is undefined.
//
// TODO: define a proper fallback, such as comparing large numbers textually or
// actually allowing numbers of unlimited length.
//
// TODO: cap this to a lower number (like 100) and maybe allow a larger number
// in an option?
const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
}
// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}

275
vendor/golang.org/x/text/internal/colltab/table.go generated vendored Normal file
View file

@ -0,0 +1,275 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// Table holds all collation data for a given collation ordering.
type Table struct {
Index Trie // main trie
// expansion info
ExpandElem []uint32
// contraction info
ContractTries ContractTrieSet
ContractElem []uint32
MaxContractLen int
VariableTop uint32
}
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b})
}
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
return t.appendNext(w, source{str: s})
}
func (t *Table) Start(p int, b []byte) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) StartString(p int, s string) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) Domain() []string {
// TODO: implement
panic("not implemented")
}
func (t *Table) Top() uint32 {
return t.VariableTop
}
type source struct {
str string
bytes []byte
}
func (src *source) lookup(t *Table) (ce Elem, sz int) {
if src.bytes == nil {
return t.Index.lookupString(src.str)
}
return t.Index.lookup(src.bytes)
}
func (src *source) tail(sz int) {
if src.bytes == nil {
src.str = src.str[sz:]
} else {
src.bytes = src.bytes[sz:]
}
}
func (src *source) nfd(buf []byte, end int) []byte {
if src.bytes == nil {
return norm.NFD.AppendString(buf[:0], src.str[:end])
}
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
}
func (src *source) rune() (r rune, sz int) {
if src.bytes == nil {
return utf8.DecodeRuneInString(src.str)
}
return utf8.DecodeRune(src.bytes)
}
func (src *source) properties(f norm.Form) norm.Properties {
if src.bytes == nil {
return f.PropertiesString(src.str)
}
return f.Properties(src.bytes)
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are
// appended as well. It returns a new slice that includes the appended
// weights and the number of bytes consumed from s.
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
ce, sz := src.lookup(t)
tp := ce.ctype()
if tp == ceNormal {
if ce == 0 {
r, _ := src.rune()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
// TODO: performance can be considerably improved here.
n = sz
var buf [16]byte // Used for decomposing Hangul.
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
ce, sz = t.Index.lookup(b)
w = append(w, ce)
}
return w, n
}
ce = makeImplicitCE(implicitPrimary(r))
}
w = append(w, ce)
} else if tp == ceExpansionIndex {
w = t.appendExpansion(w, ce)
} else if tp == ceContractionIndex {
n := 0
src.tail(sz)
if src.bytes == nil {
w, n = t.matchContractionString(w, ce, src.str)
} else {
w, n = t.matchContraction(w, ce, src.bytes)
}
sz += n
} else if tp == ceDecompose {
// Decompose using NFKD and replace tertiary weights.
t1, t2 := splitDecompose(ce)
i := len(w)
nfkd := src.properties(norm.NFKD).Decomposition()
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
w, p = t.appendNext(w, source{bytes: nfkd})
}
w[i] = w[i].updateTertiary(t1)
if i++; i < len(w) {
w[i] = w[i].updateTertiary(t2)
for i++; i < len(w); i++ {
w[i] = w[i].updateTertiary(maxTertiary)
}
}
}
return w, sz
}
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
i := splitExpandIndex(ce)
n := int(t.ExpandElem[i])
i++
for _, ce := range t.ExpandElem[i : i+n] {
w = append(w, Elem(ce))
}
return w
}
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scanner(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.Properties(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.Properties(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: unify the two implementations. This is best done after first simplifying
// the algorithm taking into account the inclusion of both NFC and NFD forms
// in the table.
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scannerString(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.PropertiesString(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.PropertiesString(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}

159
vendor/golang.org/x/text/internal/colltab/trie.go generated vendored Normal file
View file

@ -0,0 +1,159 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character in an
// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table of collation elements.
// For a full description, see go.text/collate/build/trie.go.
package colltab
const blockSize = 64
type Trie struct {
Index0 []uint16 // index for first byte (0xC0-0xFF)
Values0 []uint32 // index for first byte (0x00-0x7F)
Index []uint16
Values []uint32
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
)
func (t *Trie) lookupValue(n uint16, b byte) Elem {
return Elem(t.Values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *Trie) lookup(s []byte) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// The body of lookupString is a verbatim copy of that of lookup.
func (t *Trie) lookupString(s string) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}

31
vendor/golang.org/x/text/internal/colltab/weighter.go generated vendored Normal file
View file

@ -0,0 +1,31 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab // import "golang.org/x/text/internal/colltab"
// A Weighter can be used as a source for Collator and Searcher.
type Weighter interface {
// Start finds the start of the segment that includes position p.
Start(p int, b []byte) int
// StartString finds the start of the segment that includes position p.
StartString(p int, s string) int
// AppendNext appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
// AppendNextString appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
// Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table.
Domain() []string
// Top returns the highest variable primary value.
Top() uint32
}

375
vendor/golang.org/x/text/internal/gen/code.go generated vendored Normal file
View file

@ -0,0 +1,375 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gen
import (
"bytes"
"encoding/gob"
"fmt"
"hash"
"hash/fnv"
"io"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// This file contains utilities for generating code.
// TODO: other write methods like:
// - slices, maps, types, etc.
// CodeWriter is a utility for writing structured code. It computes the content
// hash and size of written content. It ensures there are newlines between
// written code blocks.
type CodeWriter struct {
buf bytes.Buffer
Size int
Hash hash.Hash32 // content hash
gob *gob.Encoder
// For comments we skip the usual one-line separator if they are followed by
// a code block.
skipSep bool
}
func (w *CodeWriter) Write(p []byte) (n int, err error) {
return w.buf.Write(p)
}
// NewCodeWriter returns a new CodeWriter.
func NewCodeWriter() *CodeWriter {
h := fnv.New32()
return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
}
// WriteGoFile appends the buffer with the total size of all created structures
// and writes it as a Go file to the given file with the given package name.
func (w *CodeWriter) WriteGoFile(filename, pkg string) {
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, ""); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteVersionedGoFile appends the buffer with the total size of all created
// structures and writes it as a Go file to the given file with the given
// package name and build tags for the current Unicode version,
func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) {
tags := buildTags()
if tags != "" {
pattern := fileToPattern(filename)
updateBuildTags(pattern)
filename = fmt.Sprintf(pattern, UnicodeVersion())
}
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, tags); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) {
sz := w.Size
if sz > 0 {
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
}
defer w.buf.Reset()
return WriteGo(out, pkg, tags, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {
fmt.Fprintf(w, f, x...)
}
func (w *CodeWriter) insertSep() {
if w.skipSep {
w.skipSep = false
return
}
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n")
}
// WriteComment writes a comment block. All line starts are prefixed with "//".
// Initial empty lines are gobbled. The indentation for the first line is
// stripped from consecutive lines.
func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
s := fmt.Sprintf(comment, args...)
s = strings.Trim(s, "\n")
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n// ")
w.skipSep = true
// strip first indent level.
sep := "\n"
for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
sep += s[:1]
}
strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
w.printf("\n")
}
func (w *CodeWriter) writeSizeInfo(size int) {
w.printf("// Size: %d bytes\n", size)
}
// WriteConst writes a constant of the given name and value.
func (w *CodeWriter) WriteConst(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
switch v.Type().Kind() {
case reflect.String:
w.printf("const %s %s = ", name, typeName(x))
w.WriteString(v.String())
w.printf("\n")
default:
w.printf("const %s = %#v\n", name, x)
}
}
// WriteVar writes a variable of the given name and value.
func (w *CodeWriter) WriteVar(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
oldSize := w.Size
sz := int(v.Type().Size())
w.Size += sz
switch v.Type().Kind() {
case reflect.String:
w.printf("var %s %s = ", name, typeName(x))
w.WriteString(v.String())
case reflect.Struct:
w.gob.Encode(x)
fallthrough
case reflect.Slice, reflect.Array:
w.printf("var %s = ", name)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
default:
w.printf("var %s %s = ", name, typeName(x))
w.gob.Encode(x)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
}
w.printf("\n")
}
func (w *CodeWriter) writeValue(v reflect.Value) {
x := v.Interface()
switch v.Kind() {
case reflect.String:
w.WriteString(v.String())
case reflect.Array:
// Don't double count: callers of WriteArray count on the size being
// added, so we need to discount it here.
w.Size -= int(v.Type().Size())
w.writeSlice(x, true)
case reflect.Slice:
w.writeSlice(x, false)
case reflect.Struct:
w.printf("%s{\n", typeName(v.Interface()))
t := v.Type()
for i := 0; i < v.NumField(); i++ {
w.printf("%s: ", t.Field(i).Name)
w.writeValue(v.Field(i))
w.printf(",\n")
}
w.printf("}")
default:
w.printf("%#v", x)
}
}
// WriteString writes a string literal.
func (w *CodeWriter) WriteString(s string) {
io.WriteString(w.Hash, s) // content hash
w.Size += len(s)
const maxInline = 40
if len(s) <= maxInline {
w.printf("%q", s)
return
}
// We will render the string as a multi-line string.
const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
// When starting on its own line, go fmt indents line 2+ an extra level.
n, max := maxWidth, maxWidth-4
// As per https://golang.org/issue/18078, the compiler has trouble
// compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN,
// for large N. We insert redundant, explicit parentheses to work around
// that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 +
// ... + s127) + etc + (etc + ... + sN).
explicitParens, extraComment := len(s) > 128*1024, ""
if explicitParens {
w.printf(`(`)
extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078"
}
// Print "" +\n, if a string does not start on its own line.
b := w.buf.Bytes()
if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment)
n, max = maxWidth, maxWidth
}
w.printf(`"`)
for sz, p, nLines := 0, 0, 0; p < len(s); {
var r rune
r, sz = utf8.DecodeRuneInString(s[p:])
out := s[p : p+sz]
chars := 1
if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
switch sz {
case 1:
out = fmt.Sprintf("\\x%02x", s[p])
case 2, 3:
out = fmt.Sprintf("\\u%04x", r)
case 4:
out = fmt.Sprintf("\\U%08x", r)
}
chars = len(out)
} else if r == '\\' {
out = "\\" + string(r)
chars = 2
}
if n -= chars; n < 0 {
nLines++
if explicitParens && nLines&63 == 63 {
w.printf("\") + (\"")
}
w.printf("\" +\n\"")
n = max - len(out)
}
w.printf("%s", out)
p += sz
}
w.printf(`"`)
if explicitParens {
w.printf(`)`)
}
}
// WriteSlice writes a slice value.
func (w *CodeWriter) WriteSlice(x interface{}) {
w.writeSlice(x, false)
}
// WriteArray writes an array value.
func (w *CodeWriter) WriteArray(x interface{}) {
w.writeSlice(x, true)
}
func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
v := reflect.ValueOf(x)
w.gob.Encode(v.Len())
w.Size += v.Len() * int(v.Type().Elem().Size())
name := typeName(x)
if isArray {
name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
}
if isArray {
w.printf("%s{\n", name)
} else {
w.printf("%s{ // %d elements\n", name, v.Len())
}
switch kind := v.Type().Elem().Kind(); kind {
case reflect.String:
for _, s := range x.([]string) {
w.WriteString(s)
w.printf(",\n")
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// nLine and nBlock are the number of elements per line and block.
nLine, nBlock, format := 8, 64, "%d,"
switch kind {
case reflect.Uint8:
format = "%#02x,"
case reflect.Uint16:
format = "%#04x,"
case reflect.Uint32:
nLine, nBlock, format = 4, 32, "%#08x,"
case reflect.Uint, reflect.Uint64:
nLine, nBlock, format = 4, 32, "%#016x,"
case reflect.Int8:
nLine = 16
}
n := nLine
for i := 0; i < v.Len(); i++ {
if i%nBlock == 0 && v.Len() > nBlock {
w.printf("// Entry %X - %X\n", i, i+nBlock-1)
}
x := v.Index(i).Interface()
w.gob.Encode(x)
w.printf(format, x)
if n--; n == 0 {
n = nLine
w.printf("\n")
}
}
w.printf("\n")
case reflect.Struct:
zero := reflect.Zero(v.Type().Elem()).Interface()
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
w.gob.EncodeValue(v)
if !reflect.DeepEqual(zero, x) {
line := fmt.Sprintf("%#v,\n", x)
line = line[strings.IndexByte(line, '{'):]
w.printf("%d: ", i)
w.printf(line)
}
}
case reflect.Array:
for i := 0; i < v.Len(); i++ {
w.printf("%d: %#v,\n", i, v.Index(i).Interface())
}
default:
panic("gen: slice elem type not supported")
}
w.printf("}")
}
// WriteType writes a definition of the type of the given value and returns the
// type name.
func (w *CodeWriter) WriteType(x interface{}) string {
t := reflect.TypeOf(x)
w.printf("type %s struct {\n", t.Name())
for i := 0; i < t.NumField(); i++ {
w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
}
w.printf("}\n")
return t.Name()
}
// typeName returns the name of the go type of x.
func typeName(x interface{}) string {
t := reflect.ValueOf(x).Type()
return strings.Replace(fmt.Sprint(t), "main.", "", 1)
}

347
vendor/golang.org/x/text/internal/gen/gen.go generated vendored Normal file
View file

@ -0,0 +1,347 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gen contains common code for the various code generation tools in the
// text repository. Its usage ensures consistency between tools.
//
// This package defines command line flags that are common to most generation
// tools. The flags allow for specifying specific Unicode and CLDR versions
// in the public Unicode data repository (https://www.unicode.org/Public).
//
// A local Unicode data mirror can be set through the flag -local or the
// environment variable UNICODE_DIR. The former takes precedence. The local
// directory should follow the same structure as the public repository.
//
// IANA data can also optionally be mirrored by putting it in the iana directory
// rooted at the top of the local mirror. Beware, though, that IANA data is not
// versioned. So it is up to the developer to use the right version.
package gen // import "golang.org/x/text/internal/gen"
import (
"bytes"
"flag"
"fmt"
"go/build"
"go/format"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"sync"
"unicode"
"golang.org/x/text/unicode/cldr"
)
var (
url = flag.String("url",
"https://www.unicode.org/Public",
"URL of Unicode database directory")
iana = flag.String("iana",
"http://www.iana.org",
"URL of the IANA repository")
unicodeVersion = flag.String("unicode",
getEnv("UNICODE_VERSION", unicode.Version),
"unicode version to use")
cldrVersion = flag.String("cldr",
getEnv("CLDR_VERSION", cldr.Version),
"cldr version to use")
)
func getEnv(name, def string) string {
if v := os.Getenv(name); v != "" {
return v
}
return def
}
// Init performs common initialization for a gen command. It parses the flags
// and sets up the standard logging parameters.
func Init() {
log.SetPrefix("")
log.SetFlags(log.Lshortfile)
flag.Parse()
}
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
`
// UnicodeVersion reports the requested Unicode version.
func UnicodeVersion() string {
return *unicodeVersion
}
// CLDRVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
var tags = []struct{ version, buildTags string }{
{"9.0.0", "!go1.10"},
{"10.0.0", "go1.10,!go1.13"},
{"11.0.0", "go1.13"},
}
// buildTags reports the build tags used for the current Unicode version.
func buildTags() string {
v := UnicodeVersion()
for _, e := range tags {
if e.version == v {
return e.buildTags
}
}
log.Fatalf("Unknown build tags for Unicode version %q.", v)
return ""
}
// IsLocal reports whether data files are available locally.
func IsLocal() bool {
dir, err := localReadmeFile()
if err != nil {
return false
}
if _, err = os.Stat(dir); err != nil {
return false
}
return true
}
// OpenUCDFile opens the requested UCD file. The file is specified relative to
// the public Unicode root directory. It will call log.Fatal if there are any
// errors.
func OpenUCDFile(file string) io.ReadCloser {
return openUnicode(path.Join(*unicodeVersion, "ucd", file))
}
// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
// are any errors.
func OpenCLDRCoreZip() io.ReadCloser {
return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
}
// OpenUnicodeFile opens the requested file of the requested category from the
// root of the Unicode data archive. The file is specified relative to the
// public Unicode root directory. If version is "", it will use the default
// Unicode version. It will call log.Fatal if there are any errors.
func OpenUnicodeFile(category, version, file string) io.ReadCloser {
if version == "" {
version = UnicodeVersion()
}
return openUnicode(path.Join(category, version, file))
}
// OpenIANAFile opens the requested IANA file. The file is specified relative
// to the IANA root, which is typically either http://www.iana.org or the
// iana directory in the local mirror. It will call log.Fatal if there are any
// errors.
func OpenIANAFile(path string) io.ReadCloser {
return Open(*iana, "iana", path)
}
var (
dirMutex sync.Mutex
localDir string
)
const permissions = 0755
func localReadmeFile() (string, error) {
p, err := build.Import("golang.org/x/text", "", build.FindOnly)
if err != nil {
return "", fmt.Errorf("Could not locate package: %v", err)
}
return filepath.Join(p.Dir, "DATA", "README"), nil
}
func getLocalDir() string {
dirMutex.Lock()
defer dirMutex.Unlock()
readme, err := localReadmeFile()
if err != nil {
log.Fatal(err)
}
dir := filepath.Dir(readme)
if _, err := os.Stat(readme); err != nil {
if err := os.MkdirAll(dir, permissions); err != nil {
log.Fatalf("Could not create directory: %v", err)
}
ioutil.WriteFile(readme, []byte(readmeTxt), permissions)
}
return dir
}
const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT.
This directory contains downloaded files used to generate the various tables
in the golang.org/x/text subrepo.
Note that the language subtag repo (iana/assignments/language-subtag-registry)
and all other times in the iana subdirectory are not versioned and will need
to be periodically manually updated. The easiest way to do this is to remove
the entire iana directory. This is mostly of concern when updating the language
package.
`
// Open opens subdir/path if a local directory is specified and the file exists,
// where subdir is a directory relative to the local root, or fetches it from
// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
func Open(urlRoot, subdir, path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path))
return open(file, urlRoot, path)
}
func openUnicode(path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), filepath.FromSlash(path))
return open(file, *url, path)
}
// TODO: automatically periodically update non-versioned files.
func open(file, urlRoot, path string) io.ReadCloser {
if f, err := os.Open(file); err == nil {
return f
}
r := get(urlRoot, path)
defer r.Close()
b, err := ioutil.ReadAll(r)
if err != nil {
log.Fatalf("Could not download file: %v", err)
}
os.MkdirAll(filepath.Dir(file), permissions)
if err := ioutil.WriteFile(file, b, permissions); err != nil {
log.Fatalf("Could not create file: %v", err)
}
return ioutil.NopCloser(bytes.NewReader(b))
}
func get(root, path string) io.ReadCloser {
url := root + "/" + path
fmt.Printf("Fetching %s...", url)
defer fmt.Println(" done.")
resp, err := http.Get(url)
if err != nil {
log.Fatalf("HTTP GET: %v", err)
}
if resp.StatusCode != 200 {
log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
}
return resp.Body
}
// TODO: use Write*Version in all applicable packages.
// WriteUnicodeVersion writes a constant for the Unicode version from which the
// tables are generated.
func WriteUnicodeVersion(w io.Writer) {
fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
}
// WriteCLDRVersion writes a constant for the CLDR version from which the
// tables are generated.
func WriteCLDRVersion(w io.Writer) {
fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
}
// WriteGoFile prepends a standard file comment and package statement to the
// given bytes, applies gofmt, and writes them to a file with the given name.
// It will call log.Fatal if there are any errors.
func WriteGoFile(filename, pkg string, b []byte) {
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, "", b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
func fileToPattern(filename string) string {
suffix := ".go"
if strings.HasSuffix(filename, "_test.go") {
suffix = "_test.go"
}
prefix := filename[:len(filename)-len(suffix)]
return fmt.Sprint(prefix, "%s", suffix)
}
func updateBuildTags(pattern string) {
for _, t := range tags {
oldFile := fmt.Sprintf(pattern, t.version)
b, err := ioutil.ReadFile(oldFile)
if err != nil {
continue
}
build := fmt.Sprintf("// +build %s", t.buildTags)
b = regexp.MustCompile(`// \+build .*`).ReplaceAll(b, []byte(build))
err = ioutil.WriteFile(oldFile, b, 0644)
if err != nil {
log.Fatal(err)
}
}
}
// WriteVersionedGoFile prepends a standard file comment, adds build tags to
// version the file for the current Unicode version, and package statement to
// the given bytes, applies gofmt, and writes them to a file with the given
// name. It will call log.Fatal if there are any errors.
func WriteVersionedGoFile(filename, pkg string, b []byte) {
pattern := fileToPattern(filename)
updateBuildTags(pattern)
filename = fmt.Sprintf(pattern, UnicodeVersion())
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, buildTags(), b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) {
src := []byte(header)
if tags != "" {
src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...)
}
src = append(src, fmt.Sprintf("package %s\n\n", pkg)...)
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {
// Print the generated code even in case of an error so that the
// returned error can be meaningfully interpreted.
n, _ = w.Write(src)
return n, err
}
return w.Write(formatted)
}
// Repackage rewrites a Go file from belonging to package main to belonging to
// the given package.
func Repackage(inFile, outFile, pkg string) {
src, err := ioutil.ReadFile(inFile)
if err != nil {
log.Fatalf("reading %s: %v", inFile, err)
}
const toDelete = "package main\n\n"
i := bytes.Index(src, []byte(toDelete))
if i < 0 {
log.Fatalf("Could not find %q in %s.", toDelete, inFile)
}
w := &bytes.Buffer{}
w.Write(src[i+len(toDelete):])
WriteGoFile(outFile, pkg, w.Bytes())
}

16
vendor/golang.org/x/text/internal/language/common.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package language
// This file contains code common to the maketables.go and the package code.
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
Deprecated AliasType = iota
Macro
Legacy
AliasTypeUnknown AliasType = -1
)

29
vendor/golang.org/x/text/internal/language/compact.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// CompactCoreInfo is a compact integer with the three core tags encoded.
type CompactCoreInfo uint32
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
// different language, region, and script values.
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
if t.LangID > langNoIndexOffset {
return 0, false
}
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
cci |= CompactCoreInfo(t.ScriptID) << 12
cci |= CompactCoreInfo(t.RegionID)
return cci, true
}
// Tag generates a tag from c.
func (c CompactCoreInfo) Tag() Tag {
return Tag{
LangID: Language(c >> 20),
RegionID: Region(c & 0x3ff),
ScriptID: Script(c>>12) & 0xff,
}
}

View file

@ -0,0 +1,61 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package compact defines a compact representation of language tags.
//
// Common language tags (at least all for which locale information is defined
// in CLDR) are assigned a unique index. Each Tag is associated with such an
// ID for selecting language-related resources (such as translations) as well
// as one for selecting regional defaults (currency, number formatting, etc.)
//
// It may want to export this functionality at some point, but at this point
// this is only available for use within x/text.
package compact // import "golang.org/x/text/internal/language/compact"
import (
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// ID is an integer identifying a single tag.
type ID uint16
func getCoreIndex(t language.Tag) (id ID, ok bool) {
cci, ok := language.GetCompactCore(t)
if !ok {
return 0, false
}
i := sort.Search(len(coreTags), func(i int) bool {
return cci <= coreTags[i]
})
if i == len(coreTags) || coreTags[i] != cci {
return 0, false
}
return ID(i), true
}
// Parent returns the ID of the parent or the root ID if id is already the root.
func (id ID) Parent() ID {
return parents[id]
}
// Tag converts id to an internal language Tag.
func (id ID) Tag() language.Tag {
if int(id) >= len(coreTags) {
return specialTags[int(id)-len(coreTags)]
}
return coreTags[id].Tag()
}
var specialTags []language.Tag
func init() {
tags := strings.Split(specialTagsStr, " ")
specialTags = make([]language.Tag, len(tags))
for i, t := range tags {
specialTags[i] = language.MustParse(t)
}
}

View file

@ -0,0 +1,64 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Language tag table generator.
// Data read from the web.
package main
import (
"flag"
"fmt"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output",
"tables.go",
"output file for generated tables")
)
func main() {
gen.Init()
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "compact")
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
b := newBuilder(w)
gen.WriteCLDRVersion(w)
b.writeCompactIndex()
}
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
supp *cldr.SupplementalData
}
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatal(err)
}
b := builder{
w: w,
data: data,
supp: data.Supplemental(),
}
return &b
}

View file

@ -0,0 +1,113 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file generates derivative tables based on the language package itself.
import (
"fmt"
"log"
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// Compact indices:
// Note -va-X variants only apply to localization variants.
// BCP variants only ever apply to language.
// The only ambiguity between tags is with regions.
func (b *builder) writeCompactIndex() {
// Collect all language tags for which we have any data in CLDR.
m := map[language.Tag]bool{}
for _, lang := range b.data.Locales() {
// We include all locales unconditionally to be consistent with en_US.
// We want en_US, even though it has no data associated with it.
// TODO: put any of the languages for which no data exists at the end
// of the index. This allows all components based on ICU to use that
// as the cutoff point.
// if x := data.RawLDML(lang); false ||
// x.LocaleDisplayNames != nil ||
// x.Characters != nil ||
// x.Delimiters != nil ||
// x.Measurement != nil ||
// x.Dates != nil ||
// x.Numbers != nil ||
// x.Units != nil ||
// x.ListPatterns != nil ||
// x.Collations != nil ||
// x.Segmentations != nil ||
// x.Rbnf != nil ||
// x.Annotations != nil ||
// x.Metadata != nil {
// TODO: support POSIX natively, albeit non-standard.
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
m[tag] = true
// }
}
// TODO: plural rules are also defined for the deprecated tags:
// iw mo sh tl
// Consider removing these as compact tags.
// Include locales for plural rules, which uses a different structure.
for _, plurals := range b.supp.Plurals {
for _, rules := range plurals.PluralRules {
for _, lang := range strings.Split(rules.Locales, " ") {
m[language.Make(lang)] = true
}
}
}
var coreTags []language.CompactCoreInfo
var special []string
for t := range m {
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
log.Fatalf("Unexpected extension %v in %v", x, t)
}
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
cci, ok := language.GetCompactCore(t)
if !ok {
log.Fatalf("Locale for non-basic language %q", t)
}
coreTags = append(coreTags, cci)
} else {
special = append(special, t.String())
}
}
w := b.w
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
sort.Strings(special)
w.WriteComment(`
NumCompactTags is the number of common tags. The maximum tag is
NumCompactTags-1.`)
w.WriteConst("NumCompactTags", len(m))
fmt.Fprintln(w, "const (")
for i, t := range coreTags {
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
}
for i, t := range special {
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
}
fmt.Fprintln(w, ")")
w.WriteVar("coreTags", coreTags)
w.WriteConst("specialTagsStr", strings.Join(special, " "))
}
func ident(s string) string {
return strings.Replace(s, "-", "", -1) + "Index"
}

View file

@ -0,0 +1,54 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language"
"golang.org/x/text/internal/language/compact"
"golang.org/x/text/unicode/cldr"
)
func main() {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("parents.go", "compact")
// Create parents table.
type ID uint16
parents := make([]ID, compact.NumCompactTags)
for _, loc := range data.Locales() {
tag := language.MustParse(loc)
index, ok := compact.FromTag(tag)
if !ok {
continue
}
parentIndex := compact.ID(0) // und
for p := tag.Parent(); p != language.Und; p = p.Parent() {
if x, ok := compact.FromTag(p); ok {
parentIndex = x
break
}
}
parents[index] = ID(parentIndex)
}
w.WriteComment(`
parents maps a compact index of a tag to the compact index of the parent of
this tag.`)
w.WriteVar("parents", parents)
}

View file

@ -0,0 +1,260 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_index.go -output tables.go
//go:generate go run gen_parents.go
package compact
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"strings"
"golang.org/x/text/internal/language"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
// NOTE: exported tags will become part of the public API.
language ID
locale ID
full fullTag // always a language.Tag for now.
}
const _und = 0
type fullTag interface {
IsRoot() bool
Parent() language.Tag
}
// Make a compact Tag from a fully specified internal language Tag.
func Make(t language.Tag) (tag Tag) {
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
if r, err := language.ParseRegion(region[:2]); err == nil {
tFull := t
t, _ = t.SetTypeForKey("rg", "")
// TODO: should we not consider "va" for the language tag?
var exact1, exact2 bool
tag.language, exact1 = FromTag(t)
t.RegionID = r
tag.locale, exact2 = FromTag(t)
if !exact1 || !exact2 {
tag.full = tFull
}
return tag
}
}
lang, ok := FromTag(t)
tag.language = lang
tag.locale = lang
if !ok {
tag.full = t
}
return tag
}
// Tag returns an internal language Tag version of this tag.
func (t Tag) Tag() language.Tag {
if t.full != nil {
return t.full.(language.Tag)
}
tag := t.language.Tag()
if t.language != t.locale {
loc := t.locale.Tag()
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
}
return tag
}
// IsCompact reports whether this tag is fully defined in terms of ID.
func (t *Tag) IsCompact() bool {
return t.full == nil
}
// MayHaveVariants reports whether a tag may have variants. If it returns false
// it is guaranteed the tag does not have variants.
func (t Tag) MayHaveVariants() bool {
return t.full != nil || int(t.language) >= len(coreTags)
}
// MayHaveExtensions reports whether a tag may have extensions. If it returns
// false it is guaranteed the tag does not have them.
func (t Tag) MayHaveExtensions() bool {
return t.full != nil ||
int(t.language) >= len(coreTags) ||
t.language != t.locale
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if t.full != nil {
return t.full.IsRoot()
}
return t.language == _und
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.full != nil {
return Make(t.full.Parent())
}
if t.language != t.locale {
// Simulate stripping -u-rg-xxxxxx
return Tag{language: t.language, locale: t.language}
}
// TODO: use parent lookup table once cycle from internal package is
// removed. Probably by internalizing the table and declaring this fast
// enough.
// lang := compactID(internal.Parent(uint16(t.language)))
lang, _ := FromTag(t.language.Tag().Parent())
return Tag{language: lang, locale: lang}
}
// returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func LanguageID(t Tag) (id ID, exact bool) {
return t.language, t.full == nil
}
// RegionalID returns the ID for the regional variant of this tag. This index is
// used to indicate region-specific overrides, such as default currency, default
// calendar and week data, default time cycle, and default measurement system
// and unit preferences.
//
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
// settings for currency, number formatting, etc. The CompactIndex for this tag
// will be that for en-GB, while the RegionalID will be the one corresponding to
// en-US.
func RegionalID(t Tag) (id ID, exact bool) {
return t.locale, t.full == nil
}
// LanguageTag returns t stripped of regional variant indicators.
//
// At the moment this means it is stripped of a regional and variant subtag "rg"
// and "va" in the "u" extension.
func (t Tag) LanguageTag() Tag {
if t.full == nil {
return Tag{language: t.language, locale: t.language}
}
tt := t.Tag()
tt.SetTypeForKey("rg", "")
tt.SetTypeForKey("va", "")
return Make(tt)
}
// RegionalTag returns the regional variant of the tag.
//
// At the moment this means that the region is set from the regional subtag
// "rg" in the "u" extension.
func (t Tag) RegionalTag() Tag {
rt := Tag{language: t.locale, locale: t.locale}
if t.full == nil {
return rt
}
b := language.Builder{}
tag := t.Tag()
// tag, _ = tag.SetTypeForKey("rg", "")
b.SetTag(t.locale.Tag())
if v := tag.Variants(); v != "" {
for _, v := range strings.Split(v, "-") {
b.AddVariant(v)
}
}
for _, e := range tag.Extensions() {
b.AddExt(e)
}
return t
}
// FromTag reports closest matching ID for an internal language Tag.
func FromTag(t language.Tag) (id ID, exact bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
exact = true
b, s, r := t.Raw()
if t.HasString() {
if t.IsPrivateUse() {
// We have no entries for user-defined tags.
return 0, false
}
hasExtra := false
if t.HasVariants() {
if t.HasExtensions() {
build := language.Builder{}
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
build.AddVariant(t.Variants())
exact = false
t = build.Make()
}
hasExtra = true
} else if _, ok := t.Extension('u'); ok {
// TODO: va may mean something else. Consider not considering it.
// Strip all but the 'va' entry.
old := t
variant := t.TypeForKey("va")
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
if variant != "" {
t, _ = t.SetTypeForKey("va", variant)
hasExtra = true
}
exact = old == t
} else {
exact = false
}
if hasExtra {
// We have some variants.
for i, s := range specialTags {
if s == t {
return ID(i + len(coreTags)), exact
}
}
exact = false
}
}
if x, ok := getCoreIndex(t); ok {
return x, exact
}
exact = false
if r != 0 && s == 0 {
// Deal with cases where an extra script is inserted for the region.
t, _ := t.Maximize()
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
for t = t.Parent(); t != root; t = t.Parent() {
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
return 0, exact
}
var root = language.Tag{}

View file

@ -0,0 +1,120 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package compact
// parents maps a compact index of a tag to the compact index of the parent of
// this tag.
var parents = []ID{ // 775 elements
// Entry 0 - 3F
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
// Entry 40 - 7F
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
// Entry C0 - FF
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
// Entry 100 - 13F
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
// Entry 140 - 17F
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
// Entry 200 - 23F
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
// Entry 240 - 27F
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
// Entry 280 - 2BF
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
// Entry 2C0 - 2FF
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
// Entry 300 - 33F
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
} // Size: 1574 bytes
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
Amharic Tag = Tag{language: amIndex, locale: amIndex}
Arabic Tag = Tag{language: arIndex, locale: arIndex}
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
Catalan Tag = Tag{language: caIndex, locale: caIndex}
Czech Tag = Tag{language: csIndex, locale: csIndex}
Danish Tag = Tag{language: daIndex, locale: daIndex}
German Tag = Tag{language: deIndex, locale: deIndex}
Greek Tag = Tag{language: elIndex, locale: elIndex}
English Tag = Tag{language: enIndex, locale: enIndex}
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
Spanish Tag = Tag{language: esIndex, locale: esIndex}
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
Estonian Tag = Tag{language: etIndex, locale: etIndex}
Persian Tag = Tag{language: faIndex, locale: faIndex}
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
Filipino Tag = Tag{language: filIndex, locale: filIndex}
French Tag = Tag{language: frIndex, locale: frIndex}
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
Italian Tag = Tag{language: itIndex, locale: itIndex}
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
Kannada Tag = Tag{language: knIndex, locale: knIndex}
Korean Tag = Tag{language: koIndex, locale: koIndex}
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
Lao Tag = Tag{language: loIndex, locale: loIndex}
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
Malay Tag = Tag{language: msIndex, locale: msIndex}
Burmese Tag = Tag{language: myIndex, locale: myIndex}
Nepali Tag = Tag{language: neIndex, locale: neIndex}
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
Polish Tag = Tag{language: plIndex, locale: plIndex}
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
Romanian Tag = Tag{language: roIndex, locale: roIndex}
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
Slovak Tag = Tag{language: skIndex, locale: skIndex}
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
Serbian Tag = Tag{language: srIndex, locale: srIndex}
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
Swedish Tag = Tag{language: svIndex, locale: svIndex}
Swahili Tag = Tag{language: swIndex, locale: swIndex}
Tamil Tag = Tag{language: taIndex, locale: taIndex}
Telugu Tag = Tag{language: teIndex, locale: teIndex}
Thai Tag = Tag{language: thIndex, locale: thIndex}
Turkish Tag = Tag{language: trIndex, locale: trIndex}
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
Urdu Tag = Tag{language: urIndex, locale: urIndex}
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
)

167
vendor/golang.org/x/text/internal/language/compose.go generated vendored Normal file
View file

@ -0,0 +1,167 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"sort"
"strings"
)
// A Builder allows constructing a Tag from individual components.
// Its main user is Compose in the top-level language package.
type Builder struct {
Tag Tag
private string // the x extension
variants []string
extensions []string
}
// Make returns a new Tag from the current settings.
func (b *Builder) Make() Tag {
t := b.Tag
if len(b.extensions) > 0 || len(b.variants) > 0 {
sort.Sort(sortVariants(b.variants))
sort.Strings(b.extensions)
if b.private != "" {
b.extensions = append(b.extensions, b.private)
}
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variants...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.extensions...)
t.str = string(buf[:p])
// We may not always need to remake the string, but when or when not
// to do so is rather tricky.
scan := makeScanner(buf[:p])
t, _ = parse(&scan, "")
return t
} else if b.private != "" {
t.str = b.private
t.RemakeString()
}
return t
}
// SetTag copies all the settings from a given Tag. Any previously set values
// are discarded.
func (b *Builder) SetTag(t Tag) {
b.Tag.LangID = t.LangID
b.Tag.RegionID = t.RegionID
b.Tag.ScriptID = t.ScriptID
// TODO: optimize
b.variants = b.variants[:0]
if variants := t.Variants(); variants != "" {
for _, vr := range strings.Split(variants[1:], "-") {
b.variants = append(b.variants, vr)
}
}
b.extensions, b.private = b.extensions[:0], ""
for _, e := range t.Extensions() {
b.AddExt(e)
}
}
// AddExt adds extension e to the tag. e must be a valid extension as returned
// by Tag.Extension. If the extension already exists, it will be discarded,
// except for a -u extension, where non-existing key-type pairs will added.
func (b *Builder) AddExt(e string) {
if e[0] == 'x' {
if b.private == "" {
b.private = e
}
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] += e[1:]
}
return
}
}
b.extensions = append(b.extensions, e)
}
// SetExt sets the extension e to the tag. e must be a valid extension as
// returned by Tag.Extension. If the extension already exists, it will be
// overwritten, except for a -u extension, where the individual key-type pairs
// will be set.
func (b *Builder) SetExt(e string) {
if e[0] == 'x' {
b.private = e
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] = e + s[1:]
} else {
b.extensions[i] = e
}
return
}
}
b.extensions = append(b.extensions, e)
}
// AddVariant adds any number of variants.
func (b *Builder) AddVariant(v ...string) {
for _, v := range v {
if v != "" {
b.variants = append(b.variants, v)
}
}
}
// ClearVariants removes any variants previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearVariants() {
b.variants = b.variants[:0]
}
// ClearExtensions removes any extensions previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearExtensions() {
b.private = ""
b.extensions = b.extensions[:0]
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariants []string
func (s sortVariants) Len() int {
return len(s)
}
func (s sortVariants) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariants) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}

28
vendor/golang.org/x/text/internal/language/coverage.go generated vendored Normal file
View file

@ -0,0 +1,28 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func BaseLanguages() []Language {
base := make([]Language, 0, NumLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Language(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Language(i))
}
v >>= 1
i++
}
}
return base
}

1520
vendor/golang.org/x/text/internal/language/gen.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code common to the maketables.go and the package code.
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
Deprecated AliasType = iota
Macro
Legacy
AliasTypeUnknown AliasType = -1
)

596
vendor/golang.org/x/text/internal/language/language.go generated vendored Normal file
View file

@ -0,0 +1,596 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
package language // import "golang.org/x/text/internal/language"
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed. The zero value of Tag is Und.
type Tag struct {
// TODO: the following fields have the form TagTypeID. This name is chosen
// to allow refactoring the public package without conflicting with its
// Base, Script, and Region methods. Once the transition is fully completed
// the ID can be stripped from the name.
LangID Language
RegionID Region
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
// storing lang, region, and ScriptID codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
ScriptID Script
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
}
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
t, _ := Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
// TODO: consider removing
func (t Tag) Raw() (b Language, s Script, r Region) {
return t.LangID, t.ScriptID, t.RegionID
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(Und)
}
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
// tag.
func (t Tag) IsPrivateUse() bool {
return t.str != "" && t.pVariant == 0
}
// RemakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) RemakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.LangID.StringToBuf(buf[:])
if t.ScriptID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.ScriptID.String())
}
if t.RegionID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.RegionID.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.ScriptID == 0 && t.RegionID == 0 {
return t.LangID.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.ScriptID == 0 && t.RegionID == 0 {
text = append(text, t.LangID.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Parse(string(text))
*t = tag
return err
}
// Variants returns the part of the tag holding all variants or the empty string
// if there are no variants defined.
func (t Tag) Variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// VariantOrPrivateUseTags returns variants or private use tags.
func (t Tag) VariantOrPrivateUseTags() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// HasString reports whether this tag defines more than just the raw
// components.
func (t Tag) HasString() bool {
return t.str != ""
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
b, s, r := t.Raw()
t = Tag{LangID: b, ScriptID: s, RegionID: r}
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID == t.ScriptID {
return Tag{LangID: t.LangID}
}
}
return t
}
if t.LangID != 0 {
if t.RegionID != 0 {
maxScript := t.ScriptID
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.ScriptID
}
for i := range parents {
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if Region(r) == t.RegionID {
return Tag{
LangID: t.LangID,
ScriptID: Script(parents[i].script),
RegionID: Region(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != maxScript {
return Tag{LangID: t.LangID, ScriptID: maxScript}
}
return Tag{LangID: t.LangID}
} else if t.ScriptID != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != t.ScriptID {
return Und
}
return Tag{LangID: t.LangID}
}
}
return Und
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return "", ErrSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return "", ErrSyntax
}
return string(scan.b), nil
}
// HasVariants reports whether t has variants.
func (t Tag) HasVariants() bool {
return uint16(t.pVariant) < t.pExt
}
// HasExtensions reports whether t has extensions.
func (t Tag) HasExtensions() bool {
return int(t.pExt) < len(t.str)
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext string, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return ext, true
}
}
return "", false
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []string {
e := []string{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, ext)
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
}
return ""
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.IsPrivateUse() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
}
// findKeyAndType returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getLangID(buf[:copy(buf[:], s)])
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return 0, ErrSyntax
}
var buf [4]byte
return getScriptID(script, buf[:copy(buf[:], s)])
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getRegionID(buf[:copy(buf[:], s)])
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
return true
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r == 0 {
return false
}
return int(regionInclusion[r]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r == _GB {
r = _UK
}
if (r.typ() & ccTLD) == 0 {
return 0, errNoTLD
}
return r, nil
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r); cr != 0 {
return cr
}
return r
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
ID uint8
str string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil
}
return Variant{}, NewValueError([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.str
}

412
vendor/golang.org/x/text/internal/language/lookup.go generated vendored Normal file
View file

@ -0,0 +1,412 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"fmt"
"sort"
"strconv"
"golang.org/x/text/internal/tag"
)
// findIndex tries to find the given tag in idx and returns a standardized error
// if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) {
return 0, ErrSyntax
}
i := idx.Index(key)
if i == -1 {
return 0, NewValueError(key)
}
return i, nil
}
func searchUint(imap []uint16, key uint16) int {
return sort.Search(len(imap), func(i int) bool {
return imap[i] >= key
})
}
type Language uint16
// getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag.
func getLangID(s []byte) (Language, error) {
if len(s) == 2 {
return getLangISO2(s)
}
return getLangISO3(s)
}
// TODO language normalization as well as the AliasMaps could be moved to the
// higher level package, but it is a bit tricky to separate the generation.
func (id Language) Canonicalize() (Language, AliasType) {
return normLang(id)
}
// mapLang returns the mapped langID of id according to mapping m.
func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(AliasMap), func(i int) bool {
return AliasMap[i].From >= uint16(id)
})
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
return Language(AliasMap[k].To), AliasTypes[k]
}
return id, AliasTypeUnknown
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) {
return 0, ErrSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return Language(i), nil
}
return 0, NewValueError(s)
}
const base = 'z' - 'a' + 1
func strToInt(s []byte) uint {
v := uint(0)
for i := 0; i < len(s); i++ {
v *= base
v += uint(s[i] - 'a')
}
return v
}
// converts the given integer to the original ASCII string passed to strToInt.
// len(s) must match the number of characters obtained.
func intToStr(v uint, s []byte) {
for i := len(s) - 1; i >= 0; i-- {
s[i] = byte(v%base) + 'a'
v /= base
}
}
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO3(s []byte) (Language, error) {
if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
// We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default.
id := Language(i)
if id == nonCanonicalUnd {
return 0, nil
}
return id, nil
}
}
if i := altLangISO3.Index(s); i != -1 {
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return Language(n) + langNoIndexOffset, nil
}
// Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
return Language(i), nil
}
}
return 0, NewValueError(s)
}
return 0, ErrSyntax
}
// StringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
func (id Language) StringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
} else if id == 0 {
return copy(b, "und")
}
l := lang[id<<2:]
if l[3] == 0 {
return copy(b, l[:3])
}
return copy(b, l[:2])
}
// String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
func (b Language) String() string {
if b == 0 {
return "und"
} else if b >= langNoIndexOffset {
b -= langNoIndexOffset
buf := [3]byte{}
intToStr(uint(b), buf[:])
return string(buf[:])
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
}
return l[:2]
}
// ISO3 returns the ISO 639-3 language code.
func (b Language) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
} else if l[2] == 0 {
return altLangISO3.Elem(int(l[3]))[:3]
}
// This allocation will only happen for 3-letter ISO codes
// that are non-canonical BCP 47 language identifiers.
return l[0:1] + l[2:4]
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b Language) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
// SuppressScript returns the script marked as SuppressScript in the IANA
// language tag repository, or 0 if there is no such script.
func (b Language) SuppressScript() Script {
if b < langNoIndexOffset {
return Script(suppressScript[b])
}
return 0
}
type Region uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
func getRegionID(s []byte) (Region, error) {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
}
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
return getRegionM49(int(i))
}
}
return getRegionISO2(s)
}
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO2(s []byte) (Region, error) {
i, err := findIndex(regionISO, s, "ZZ")
if err != nil {
return 0, err
}
return Region(i) + isoRegionOffset, nil
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO3(s []byte) (Region, error) {
if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
return Region(i) + isoRegionOffset, nil
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
return Region(altRegionIDs[i/3]), nil
}
}
return 0, NewValueError(s)
}
return 0, ErrSyntax
}
func getRegionM49(n int) (Region, error) {
if 0 < n && n <= 999 {
const (
searchBits = 7
regionBits = 9
regionMask = 1<<regionBits - 1
)
idx := n >> searchBits
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
val := uint16(n) << regionBits // we rely on bits shifting out
i := sort.Search(len(buf), func(i int) bool {
return buf[i] >= val
})
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
return Region(r & regionMask), nil
}
}
var e ValueError
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
return 0, e
}
// normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR).
func normRegion(r Region) Region {
m := regionOldMap
k := sort.Search(len(m), func(i int) bool {
return m[i].From >= uint16(r)
})
if k < len(m) && m[k].From == uint16(r) {
return Region(m[k].To)
}
return 0
}
const (
iso3166UserAssigned = 1 << iota
ccTLD
bcp47Region
)
func (r Region) typ() byte {
return regionTypes[r]
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r Region) String() string {
if r < isoRegionOffset {
if r == 0 {
return "ZZ"
}
return fmt.Sprintf("%03d", r.M49())
}
r -= isoRegionOffset
return regionISO.Elem(int(r))[:2]
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r Region) ISO3() string {
if r < isoRegionOffset {
return "ZZZ"
}
r -= isoRegionOffset
reg := regionISO.Elem(int(r))
switch reg[2] {
case 0:
return altRegionISO3[reg[3]:][:3]
case ' ':
return "ZZZ"
}
return reg[0:1] + reg[2:4]
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r Region) M49() int {
return int(m49[r])
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}
type Script uint8
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
func getScriptID(idx tag.Index, s []byte) (Script, error) {
i, err := findIndex(idx, s, "Zzzz")
return Script(i), err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s Script) String() string {
if s == 0 {
return "Zzzz"
}
return script.Elem(int(s))
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s Script) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx
}
const (
maxAltTaglen = len("en-US-POSIX")
maxLen = maxAltTaglen
)
var (
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
// their base language or index to more elaborate tag.
grandfatheredMap = map[[maxLen]byte]int16{
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
// Grandfathered tags with no modern replacement will be converted as
// follows:
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
// CLDR-specific tag.
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
}
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
)
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
if v, ok := grandfatheredMap[s]; ok {
if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
}
t.LangID = Language(v)
return t, true
}
return t, false
}

226
vendor/golang.org/x/text/internal/language/match.go generated vendored Normal file
View file

@ -0,0 +1,226 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "errors"
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id Language) {
if t.LangID == 0 {
t.LangID = id
}
}
func (t *Tag) setUndefinedScript(id Script) {
if t.ScriptID == 0 {
t.ScriptID = id
}
}
func (t *Tag) setUndefinedRegion(id Region) {
if t.RegionID == 0 || t.RegionID.Contains(id) {
t.RegionID = id
}
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.RemakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
t.RegionID = Region(x.region)
}
return true
}
return false
}
// Maximize returns a new tag with missing tags filled in.
func (t Tag) Maximize() (Tag, error) {
return addTags(t)
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.IsPrivateUse() {
return t, nil
}
if t.ScriptID != 0 && t.RegionID != 0 {
if t.LangID != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.RegionID : t.RegionID+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if Script(x.script) == t.ScriptID {
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
}
if t.LangID != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.ScriptID != 0 {
for _, x := range list {
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(Region(x.region))
return t, nil
}
}
} else if t.RegionID != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
tt.RegionID = Region(x.region)
tt.setUndefinedScript(Script(x.script))
goodScript = goodScript && tt.ScriptID == Script(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.ScriptID = tt.ScriptID
}
}
}
}
} else {
// Search matches for und-script.
if t.ScriptID != 0 {
x := likelyScript[t.ScriptID]
if x.region != 0 {
t.setUndefinedRegion(Region(x.region))
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.RegionID != 0 {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
t.RegionID = Region(x.region)
}
} else {
x := likelyRegion[t.RegionID]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(Script(x.script))
t.setUndefinedRegion(Region(x.region))
}
specializeRegion(&t)
if t.LangID == 0 {
t.LangID = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.LangID = id.LangID
t.ScriptID = id.ScriptID
t.RegionID = id.RegionID
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.RemakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(Und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{LangID: t.LangID},
{LangID: t.LangID, RegionID: t.RegionID},
{LangID: t.LangID, ScriptID: t.ScriptID},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}

594
vendor/golang.org/x/text/internal/language/parse.go generated vendored Normal file
View file

@ -0,0 +1,594 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"errors"
"fmt"
"sort"
"golang.org/x/text/internal/tag"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// ErrSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var ErrSyntax = errors.New("language: tag is not well-formed")
// ErrDuplicateKey is returned when a tag contains the same key twice with
// different values in the -u section.
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
v [8]byte
}
// NewValueError creates a new ValueError.
func NewValueError(tag []byte) ValueError {
var e ValueError
copy(e.v[:], tag)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(ErrSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(ErrSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
func Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return Und, ErrSyntax
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
return parse(&scan, s)
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = Und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, ErrSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return Und, ErrSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(ErrSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.LangID.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.LangID = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.ScriptID, e = getScriptID(script, scan.token)
if t.ScriptID == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.RegionID, e = getRegionID(scan.token)
if t.RegionID == 0 {
scan.gobble(e)
} else {
scan.replace(t.RegionID.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(NewValueError(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort struct {
b [][]byte
n int // first n bytes to compare
}
func (b bytesSort) Len() int {
return len(b.b)
}
func (b bytesSort) Swap(i, j int) {
b.b[i], b.b[j] = b.b[j], b.b[i]
}
func (b bytesSort) Less(i, j int) bool {
for k := 0; k < b.n; k++ {
if b.b[i][k] == b.b[j][k] {
continue
}
return b.b[i][k] < b.b[j][k]
}
return false
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(ErrSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort{exts, 1})
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort{attrs, 3})
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(ErrSyntax)
end = keyStart
}
}
sort.Stable(bytesSort{keys, 2})
if n := len(keys); n > 0 {
k := 0
for i := 1; i < n; i++ {
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
k++
keys[k] = keys[i]
} else if !bytes.Equal(keys[k], keys[i]) {
scan.setError(ErrDuplicateKey)
}
}
keys = keys[:k+1]
}
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], reordered)
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}

3431
vendor/golang.org/x/text/internal/language/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff

48
vendor/golang.org/x/text/internal/language/tags.go generated vendored Normal file
View file

@ -0,0 +1,48 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func MustParse(s string) Tag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
func MustParseBase(s string) Language {
b, err := ParseBase(s)
if err != nil {
panic(err)
}
return b
}
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
}
return scr
}
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)
}
return r
}
// Und is the root language.
var Und Tag

100
vendor/golang.org/x/text/internal/tag/tag.go generated vendored Normal file
View file

@ -0,0 +1,100 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tag contains functionality handling tags and related data.
package tag // import "golang.org/x/text/internal/tag"
import "sort"
// An Index converts tags to a compact numeric value.
//
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
// be used to store additional information about the tag.
type Index string
// Elem returns the element data at the given index.
func (s Index) Elem(x int) string {
return string(s[x*4 : x*4+4])
}
// Index reports the index of the given key or -1 if it could not be found.
// Only the first len(key) bytes from the start of the 4-byte entries will be
// considered for the search and the first match in Index will be returned.
func (s Index) Index(key []byte) int {
n := len(key)
// search the index of the first entry with an equal or higher value than
// key in s.
index := sort.Search(len(s)/4, func(i int) bool {
return cmp(s[i*4:i*4+n], key) != -1
})
i := index * 4
if cmp(s[i:i+len(key)], key) != 0 {
return -1
}
return index
}
// Next finds the next occurrence of key after index x, which must have been
// obtained from a call to Index using the same key. It returns x+1 or -1.
func (s Index) Next(key []byte, x int) int {
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
return x
}
return -1
}
// cmp returns an integer comparing a and b lexicographically.
func cmp(a Index, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
for i, c := range b[:n] {
switch {
case a[i] > c:
return 1
case a[i] < c:
return -1
}
}
switch {
case len(a) < len(b):
return -1
case len(a) > len(b):
return 1
}
return 0
}
// Compare returns an integer comparing a and b lexicographically.
func Compare(a string, b []byte) int {
return cmp(Index(a), b)
}
// FixCase reformats b to the same pattern of cases as form.
// If returns false if string b is malformed.
func FixCase(form string, b []byte) bool {
if len(form) != len(b) {
return false
}
for i, c := range b {
if form[i] <= 'Z' {
if c >= 'a' {
c -= 'z' - 'Z'
}
if c < 'A' || 'Z' < c {
return false
}
} else {
if c <= 'Z' {
c += 'z' - 'Z'
}
if c < 'a' || 'z' < c {
return false
}
}
b[i] = c
}
return true
}

58
vendor/golang.org/x/text/internal/triegen/compact.go generated vendored Normal file
View file

@ -0,0 +1,58 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
// This file defines Compacter and its implementations.
import "io"
// A Compacter generates an alternative, more space-efficient way to store a
// trie value block. A trie value block holds all possible values for the last
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
type Compacter interface {
// Size returns whether the Compacter could encode the given block as well
// as its size in case it can. len(v) is always 64.
Size(v []uint64) (sz int, ok bool)
// Store stores the block using the Compacter's compression method.
// It returns a handle with which the block can be retrieved.
// len(v) is always 64.
Store(v []uint64) uint32
// Print writes the data structures associated to the given store to w.
Print(w io.Writer) error
// Handler returns the name of a function that gets called during trie
// lookup for blocks generated by the Compacter. The function should be of
// the form func (n uint32, b byte) uint64, where n is the index returned by
// the Compacter's Store method and b is the last byte of the UTF-8
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
// block.
Handler() string
}
// simpleCompacter is the default Compacter used by builder. It implements a
// normal trie block.
type simpleCompacter builder
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
return blockSize * b.ValueSize, true
}
func (b *simpleCompacter) Store(v []uint64) uint32 {
h := uint32(len(b.ValueBlocks) - blockOffset)
b.ValueBlocks = append(b.ValueBlocks, v)
return h
}
func (b *simpleCompacter) Print(io.Writer) error {
// Structures are printed in print.go.
return nil
}
func (b *simpleCompacter) Handler() string {
panic("Handler should be special-cased for this Compacter")
}

251
vendor/golang.org/x/text/internal/triegen/print.go generated vendored Normal file
View file

@ -0,0 +1,251 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
import (
"bytes"
"fmt"
"io"
"strings"
"text/template"
)
// print writes all the data structures as well as the code necessary to use the
// trie to w.
func (b *builder) print(w io.Writer) error {
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
// If we only have one root trie, all starter blocks are at position 0 and
// we can access the arrays directly.
if len(b.Trie) == 1 {
// At this point we cannot refer to the generated tables directly.
b.ASCIIBlock = b.Name + "Values"
b.StarterBlock = b.Name + "Index"
} else {
// Otherwise we need to have explicit starter indexes in the trie
// structure.
b.ASCIIBlock = "t.ascii"
b.StarterBlock = "t.utf8Start"
}
b.SourceType = "[]byte"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
b.SourceType = "string"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
if err := trieGen.Execute(w, b); err != nil {
return err
}
for _, c := range b.Compactions {
if err := c.c.Print(w); err != nil {
return err
}
}
return nil
}
func printValues(n int, values []uint64) string {
w := &bytes.Buffer{}
boff := n * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
var newline bool
for i, v := range values {
if i%6 == 0 {
newline = true
}
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
}
}
return w.String()
}
func printIndex(b *builder, nr int, n *node) string {
w := &bytes.Buffer{}
boff := nr * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
var newline bool
for i, c := range n.children {
if i%8 == 0 {
newline = true
}
if c != nil {
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
}
}
}
return w.String()
}
var (
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
"printValues": printValues,
"printIndex": printIndex,
"title": strings.Title,
"dec": func(x int) int { return x - 1 },
"psize": func(n int) string {
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
},
}).Parse(trieTemplate))
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
)
// TODO: consider the return type of lookup. It could be uint64, even if the
// internal value type is smaller. We will have to verify this with the
// performance of unicode/norm, which is very sensitive to such changes.
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
type {{.Name}}Trie struct { {{if $multi}}
ascii []{{.ValueType}} // index for ASCII bytes
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
{{end}}}
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
h := {{.Name}}TrieHandles[i]
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
}
type {{.Name}}TrieHandle struct {
ascii, multi {{.IndexType}}
}
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
{{end}}}{{else}}
return &{{.Name}}Trie{}
}
{{end}}
// lookupValue determines the type of block n and looks up the value for b.
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
switch { {{range $i, $c := .Compactions}}
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
n -= {{$c.Offset}}{{end}}
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
}
}
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
// The third block is the zero block.
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
{{end}}}
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
// Block 0 is the zero block.
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
{{end}}}
`
// TODO: consider allowing zero-length strings after evaluating performance with
// unicode/norm.
const lookupTemplate = `
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return {{.ASCIIBlock}}[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = {{.Name}}Index[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return {{.ASCIIBlock}}[c0]
}
i := {{.StarterBlock}}[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
`

494
vendor/golang.org/x/text/internal/triegen/triegen.go generated vendored Normal file
View file

@ -0,0 +1,494 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package triegen implements a code generator for a trie for associating
// unsigned integer values with UTF-8 encoded runes.
//
// Many of the go.text packages use tries for storing per-rune information. A
// trie is especially useful if many of the runes have the same value. If this
// is the case, many blocks can be expected to be shared allowing for
// information on many runes to be stored in little space.
//
// As most of the lookups are done directly on []byte slices, the tries use the
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
// runes and contributes a little bit to better performance. It also naturally
// provides a fast path for ASCII.
//
// Space is also an issue. There are many code points defined in Unicode and as
// a result tables can get quite large. So every byte counts. The triegen
// package automatically chooses the smallest integer values to represent the
// tables. Compacters allow further compression of the trie by allowing for
// alternative representations of individual trie blocks.
//
// triegen allows generating multiple tries as a single structure. This is
// useful when, for example, one wants to generate tries for several languages
// that have a lot of values in common. Some existing libraries for
// internationalization store all per-language data as a dynamically loadable
// chunk. The go.text packages are designed with the assumption that the user
// typically wants to compile in support for all supported languages, in line
// with the approach common to Go to create a single standalone binary. The
// multi-root trie approach can give significant storage savings in this
// scenario.
//
// triegen generates both tables and code. The code is optimized to use the
// automatically chosen data types. The following code is generated for a Trie
// or multiple Tries named "foo":
// - type fooTrie
// The trie type.
//
// - func newFooTrie(x int) *fooTrie
// Trie constructor, where x is the index of the trie passed to Gen.
//
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
// The lookup method, where uintX is automatically chosen.
//
// - func lookupString, lookupUnsafe and lookupStringUnsafe
// Variants of the above.
//
// - var fooValues and fooIndex and any tables generated by Compacters.
// The core trie data.
//
// - var fooTrieHandles
// Indexes of starter blocks in case of multiple trie roots.
//
// It is recommended that users test the generated trie by checking the returned
// value for every rune. Such exhaustive tests are possible as the number of
// runes in Unicode is limited.
package triegen // import "golang.org/x/text/internal/triegen"
// TODO: Arguably, the internally optimized data types would not have to be
// exposed in the generated API. We could also investigate not generating the
// code, but using it through a package. We would have to investigate the impact
// on performance of making such change, though. For packages like unicode/norm,
// small changes like this could tank performance.
import (
"encoding/binary"
"fmt"
"hash/crc64"
"io"
"log"
"unicode/utf8"
)
// builder builds a set of tries for associating values with runes. The set of
// tries can share common index and value blocks.
type builder struct {
Name string
// ValueType is the type of the trie values looked up.
ValueType string
// ValueSize is the byte size of the ValueType.
ValueSize int
// IndexType is the type of trie index values used for all UTF-8 bytes of
// a rune except the last one.
IndexType string
// IndexSize is the byte size of the IndexType.
IndexSize int
// SourceType is used when generating the lookup functions. If the user
// requests StringSupport, all lookup functions will be generated for
// string input as well.
SourceType string
Trie []*Trie
IndexBlocks []*node
ValueBlocks [][]uint64
Compactions []compaction
Checksum uint64
ASCIIBlock string
StarterBlock string
indexBlockIdx map[uint64]int
valueBlockIdx map[uint64]nodeIndex
asciiBlockIdx map[uint64]int
// Stats are used to fill out the template.
Stats struct {
NValueEntries int
NValueBytes int
NIndexEntries int
NIndexBytes int
NHandleBytes int
}
err error
}
// A nodeIndex encodes the index of a node, which is defined by the compaction
// which stores it and an index within the compaction. For internal nodes, the
// compaction is always 0.
type nodeIndex struct {
compaction int
index int
}
// compaction keeps track of stats used for the compaction.
type compaction struct {
c Compacter
blocks []*node
maxHandle uint32
totalSize int
// Used by template-based generator and thus exported.
Cutoff uint32
Offset uint32
Handler string
}
func (b *builder) setError(err error) {
if b.err == nil {
b.err = err
}
}
// An Option can be passed to Gen.
type Option func(b *builder) error
// Compact configures the trie generator to use the given Compacter.
func Compact(c Compacter) Option {
return func(b *builder) error {
b.Compactions = append(b.Compactions, compaction{
c: c,
Handler: c.Handler() + "(n, b)"})
return nil
}
}
// Gen writes Go code for a shared trie lookup structure to w for the given
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
// return the *nameTrie for tries[x]. A value can be looked up by using one of
// the various lookup methods defined on nameTrie. It returns the table size of
// the generated trie.
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
// The index contains two dummy blocks, followed by the zero block. The zero
// block is at offset 0x80, so that the offset for the zero block for
// continuation bytes is 0.
b := &builder{
Name: name,
Trie: tries,
IndexBlocks: []*node{{}, {}, {}},
Compactions: []compaction{{
Handler: name + "Values[n<<6+uint32(b)]",
}},
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
// block.
indexBlockIdx: map[uint64]int{0: 0},
valueBlockIdx: map[uint64]nodeIndex{0: {}},
asciiBlockIdx: map[uint64]int{},
}
b.Compactions[0].c = (*simpleCompacter)(b)
for _, f := range opts {
if err := f(b); err != nil {
return 0, err
}
}
b.build()
if b.err != nil {
return 0, b.err
}
if err = b.print(w); err != nil {
return 0, err
}
return b.Size(), nil
}
// A Trie represents a single root node of a trie. A builder may build several
// overlapping tries at once.
type Trie struct {
root *node
hiddenTrie
}
// hiddenTrie contains values we want to be visible to the template generator,
// but hidden from the API documentation.
type hiddenTrie struct {
Name string
Checksum uint64
ASCIIIndex int
StarterIndex int
}
// NewTrie returns a new trie root.
func NewTrie(name string) *Trie {
return &Trie{
&node{
children: make([]*node, blockSize),
values: make([]uint64, utf8.RuneSelf),
},
hiddenTrie{Name: name},
}
}
// Gen is a convenience wrapper around the Gen func passing t as the only trie
// and uses the name passed to NewTrie. It returns the size of the generated
// tables.
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
return Gen(w, t.Name, []*Trie{t}, opts...)
}
// node is a node of the intermediate trie structure.
type node struct {
// children holds this node's children. It is always of length 64.
// A child node may be nil.
children []*node
// values contains the values of this node. If it is non-nil, this node is
// either a root or leaf node:
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
values []uint64
index nodeIndex
}
// Insert associates value with the given rune. Insert will panic if a non-zero
// value is passed for an invalid rune.
func (t *Trie) Insert(r rune, value uint64) {
if value == 0 {
return
}
s := string(r)
if []rune(s)[0] != r && value != 0 {
// Note: The UCD tables will always assign what amounts to a zero value
// to a surrogate. Allowing a zero value for an illegal rune allows
// users to iterate over [0..MaxRune] without having to explicitly
// exclude surrogates, which would be tedious.
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
}
if len(s) == 1 {
// It is a root node value (ASCII).
t.root.values[s[0]] = value
return
}
n := t.root
for ; len(s) > 1; s = s[1:] {
if n.children == nil {
n.children = make([]*node, blockSize)
}
p := s[0] % blockSize
c := n.children[p]
if c == nil {
c = &node{}
n.children[p] = c
}
if len(s) > 2 && c.values != nil {
log.Fatalf("triegen: insert(%U): found internal node with values", r)
}
n = c
}
if n.values == nil {
n.values = make([]uint64, blockSize)
}
if n.children != nil {
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
}
n.values[s[0]-0x80] = value
}
// Size returns the number of bytes the generated trie will take to store. It
// needs to be exported as it is used in the templates.
func (b *builder) Size() int {
// Index blocks.
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
// Skip the first compaction, which represents the normal value blocks, as
// its totalSize does not account for the ASCII blocks, which are managed
// separately.
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
for _, c := range b.Compactions[1:] {
sz += c.totalSize
}
// TODO: this computation does not account for the fixed overhead of a using
// a compaction, either code or data. As for data, though, the typical
// overhead of data is in the order of bytes (2 bytes for cases). Further,
// the savings of using a compaction should anyway be substantial for it to
// be worth it.
// For multi-root tries, we also need to account for the handles.
if len(b.Trie) > 1 {
sz += 2 * b.IndexSize * len(b.Trie)
}
return sz
}
func (b *builder) build() {
// Compute the sizes of the values.
var vmax uint64
for _, t := range b.Trie {
vmax = maxValue(t.root, vmax)
}
b.ValueType, b.ValueSize = getIntType(vmax)
// Compute all block allocations.
// TODO: first compute the ASCII blocks for all tries and then the other
// nodes. ASCII blocks are more restricted in placement, as they require two
// blocks to be placed consecutively. Processing them first may improve
// sharing (at least one zero block can be expected to be saved.)
for _, t := range b.Trie {
b.Checksum += b.buildTrie(t)
}
// Compute the offsets for all the Compacters.
offset := uint32(0)
for i := range b.Compactions {
c := &b.Compactions[i]
c.Offset = offset
offset += c.maxHandle + 1
c.Cutoff = offset
}
// Compute the sizes of indexes.
// TODO: different byte positions could have different sizes. So far we have
// not found a case where this is beneficial.
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
for _, ib := range b.IndexBlocks {
if x := uint64(ib.index.index); x > imax {
imax = x
}
}
b.IndexType, b.IndexSize = getIntType(imax)
}
func maxValue(n *node, max uint64) uint64 {
if n == nil {
return max
}
for _, c := range n.children {
max = maxValue(c, max)
}
for _, v := range n.values {
if max < v {
max = v
}
}
return max
}
func getIntType(v uint64) (string, int) {
switch {
case v < 1<<8:
return "uint8", 1
case v < 1<<16:
return "uint16", 2
case v < 1<<32:
return "uint32", 4
}
return "uint64", 8
}
const (
blockSize = 64
// Subtract two blocks to offset 0x80, the first continuation byte.
blockOffset = 2
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
rootBlockOffset = 3
)
var crcTable = crc64.MakeTable(crc64.ISO)
func (b *builder) buildTrie(t *Trie) uint64 {
n := t.root
// Get the ASCII offset. For the first trie, the ASCII block will be at
// position 0.
hasher := crc64.New(crcTable)
binary.Write(hasher, binary.BigEndian, n.values)
hash := hasher.Sum64()
v, ok := b.asciiBlockIdx[hash]
if !ok {
v = len(b.ValueBlocks)
b.asciiBlockIdx[hash] = v
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
if v == 0 {
// Add the zero block at position 2 so that it will be assigned a
// zero reference in the lookup blocks.
// TODO: always do this? This would allow us to remove a check from
// the trie lookup, but at the expense of extra space. Analyze
// performance for unicode/norm.
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
}
}
t.ASCIIIndex = v
// Compute remaining offsets.
t.Checksum = b.computeOffsets(n, true)
// We already subtracted the normal blockOffset from the index. Subtract the
// difference for starter bytes.
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
return t.Checksum
}
func (b *builder) computeOffsets(n *node, root bool) uint64 {
// For the first trie, the root lookup block will be at position 3, which is
// the offset for UTF-8 non-ASCII starter bytes.
first := len(b.IndexBlocks) == rootBlockOffset
if first {
b.IndexBlocks = append(b.IndexBlocks, n)
}
// We special-case the cases where all values recursively are 0. This allows
// for the use of a zero block to which all such values can be directed.
hash := uint64(0)
if n.children != nil || n.values != nil {
hasher := crc64.New(crcTable)
for _, c := range n.children {
var v uint64
if c != nil {
v = b.computeOffsets(c, false)
}
binary.Write(hasher, binary.BigEndian, v)
}
binary.Write(hasher, binary.BigEndian, n.values)
hash = hasher.Sum64()
}
if first {
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
}
// Compacters don't apply to internal nodes.
if n.children != nil {
v, ok := b.indexBlockIdx[hash]
if !ok {
v = len(b.IndexBlocks) - blockOffset
b.IndexBlocks = append(b.IndexBlocks, n)
b.indexBlockIdx[hash] = v
}
n.index = nodeIndex{0, v}
} else {
h, ok := b.valueBlockIdx[hash]
if !ok {
bestI, bestSize := 0, blockSize*b.ValueSize
for i, c := range b.Compactions[1:] {
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
bestI, bestSize = i+1, sz
}
}
c := &b.Compactions[bestI]
c.totalSize += bestSize
v := c.c.Store(n.values)
if c.maxHandle < v {
c.maxHandle = v
}
h = nodeIndex{bestI, int(v)}
b.valueBlockIdx[hash] = h
}
n.index = h
}
return hash
}

371
vendor/golang.org/x/text/internal/ucd/ucd.go generated vendored Normal file
View file

@ -0,0 +1,371 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ucd provides a parser for Unicode Character Database files, the
// format of which is defined in https://www.unicode.org/reports/tr44/. See
// https://www.unicode.org/Public/UCD/latest/ucd/ for example files.
//
// It currently does not support substitutions of missing fields.
package ucd // import "golang.org/x/text/internal/ucd"
import (
"bufio"
"errors"
"fmt"
"io"
"log"
"regexp"
"strconv"
"strings"
)
// UnicodeData.txt fields.
const (
CodePoint = iota
Name
GeneralCategory
CanonicalCombiningClass
BidiClass
DecompMapping
DecimalValue
DigitValue
NumericValue
BidiMirrored
Unicode1Name
ISOComment
SimpleUppercaseMapping
SimpleLowercaseMapping
SimpleTitlecaseMapping
)
// Parse calls f for each entry in the given reader of a UCD file. It will close
// the reader upon return. It will call log.Fatal if any error occurred.
//
// This implements the most common usage pattern of using Parser.
func Parse(r io.ReadCloser, f func(p *Parser)) {
defer r.Close()
p := New(r)
for p.Next() {
f(p)
}
if err := p.Err(); err != nil {
r.Close() // os.Exit will cause defers not to be called.
log.Fatal(err)
}
}
// An Option is used to configure a Parser.
type Option func(p *Parser)
func keepRanges(p *Parser) {
p.keepRanges = true
}
var (
// KeepRanges prevents the expansion of ranges. The raw ranges can be
// obtained by calling Range(0) on the parser.
KeepRanges Option = keepRanges
)
// The Part option register a handler for lines starting with a '@'. The text
// after a '@' is available as the first field. Comments are handled as usual.
func Part(f func(p *Parser)) Option {
return func(p *Parser) {
p.partHandler = f
}
}
// The CommentHandler option passes comments that are on a line by itself to
// a given handler.
func CommentHandler(f func(s string)) Option {
return func(p *Parser) {
p.commentHandler = f
}
}
// A Parser parses Unicode Character Database (UCD) files.
type Parser struct {
scanner *bufio.Scanner
keepRanges bool // Don't expand rune ranges in field 0.
err error
comment string
field []string
// parsedRange is needed in case Range(0) is called more than once for one
// field. In some cases this requires scanning ahead.
line int
parsedRange bool
rangeStart, rangeEnd rune
partHandler func(p *Parser)
commentHandler func(s string)
}
func (p *Parser) setError(err error, msg string) {
if p.err == nil && err != nil {
if msg == "" {
p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
} else {
p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
}
}
}
func (p *Parser) getField(i int) string {
if i >= len(p.field) {
return ""
}
return p.field[i]
}
// Err returns a non-nil error if any error occurred during parsing.
func (p *Parser) Err() error {
return p.err
}
// New returns a Parser for the given Reader.
func New(r io.Reader, o ...Option) *Parser {
p := &Parser{
scanner: bufio.NewScanner(r),
}
for _, f := range o {
f(p)
}
return p
}
// Next parses the next line in the file. It returns true if a line was parsed
// and false if it reached the end of the file.
func (p *Parser) Next() bool {
if !p.keepRanges && p.rangeStart < p.rangeEnd {
p.rangeStart++
return true
}
p.comment = ""
p.field = p.field[:0]
p.parsedRange = false
for p.scanner.Scan() && p.err == nil {
p.line++
s := p.scanner.Text()
if s == "" {
continue
}
if s[0] == '#' {
if p.commentHandler != nil {
p.commentHandler(strings.TrimSpace(s[1:]))
}
continue
}
// Parse line
if i := strings.IndexByte(s, '#'); i != -1 {
p.comment = strings.TrimSpace(s[i+1:])
s = s[:i]
}
if s[0] == '@' {
if p.partHandler != nil {
p.field = append(p.field, strings.TrimSpace(s[1:]))
p.partHandler(p)
p.field = p.field[:0]
}
p.comment = ""
continue
}
for {
i := strings.IndexByte(s, ';')
if i == -1 {
p.field = append(p.field, strings.TrimSpace(s))
break
}
p.field = append(p.field, strings.TrimSpace(s[:i]))
s = s[i+1:]
}
if !p.keepRanges {
p.rangeStart, p.rangeEnd = p.getRange(0)
}
return true
}
p.setError(p.scanner.Err(), "scanner failed")
return false
}
func parseRune(b string) (rune, error) {
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
b = b[2:]
}
x, err := strconv.ParseUint(b, 16, 32)
return rune(x), err
}
func (p *Parser) parseRune(s string) rune {
x, err := parseRune(s)
p.setError(err, "failed to parse rune")
return x
}
// Rune parses and returns field i as a rune.
func (p *Parser) Rune(i int) rune {
if i > 0 || p.keepRanges {
return p.parseRune(p.getField(i))
}
return p.rangeStart
}
// Runes interprets and returns field i as a sequence of runes.
func (p *Parser) Runes(i int) (runes []rune) {
add := func(s string) {
if s = strings.TrimSpace(s); len(s) > 0 {
runes = append(runes, p.parseRune(s))
}
}
for b := p.getField(i); ; {
i := strings.IndexByte(b, ' ')
if i == -1 {
add(b)
break
}
add(b[:i])
b = b[i+1:]
}
return
}
var (
errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
// reRange matches one line of a legacy rune range.
reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
)
// Range parses and returns field i as a rune range. A range is inclusive at
// both ends. If the field only has one rune, first and last will be identical.
// It supports the legacy format for ranges used in UnicodeData.txt.
func (p *Parser) Range(i int) (first, last rune) {
if !p.keepRanges {
return p.rangeStart, p.rangeStart
}
return p.getRange(i)
}
func (p *Parser) getRange(i int) (first, last rune) {
b := p.getField(i)
if k := strings.Index(b, ".."); k != -1 {
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
}
// The first field may not be a rune, in which case we may ignore any error
// and set the range as 0..0.
x, err := parseRune(b)
if err != nil {
// Disable range parsing henceforth. This ensures that an error will be
// returned if the user subsequently will try to parse this field as
// a Rune.
p.keepRanges = true
}
// Special case for UnicodeData that was retained for backwards compatibility.
if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
if p.parsedRange {
return p.rangeStart, p.rangeEnd
}
mf := reRange.FindStringSubmatch(p.scanner.Text())
p.line++
if mf == nil || !p.scanner.Scan() {
p.setError(errIncorrectLegacyRange, "")
return x, x
}
// Using Bytes would be more efficient here, but Text is a lot easier
// and this is not a frequent case.
ml := reRange.FindStringSubmatch(p.scanner.Text())
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
p.setError(errIncorrectLegacyRange, "")
return x, x
}
p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
p.parsedRange = true
return p.rangeStart, p.rangeEnd
}
return x, x
}
// bools recognizes all valid UCD boolean values.
var bools = map[string]bool{
"": false,
"N": false,
"No": false,
"F": false,
"False": false,
"Y": true,
"Yes": true,
"T": true,
"True": true,
}
// Bool parses and returns field i as a boolean value.
func (p *Parser) Bool(i int) bool {
f := p.getField(i)
for s, v := range bools {
if f == s {
return v
}
}
p.setError(strconv.ErrSyntax, "error parsing bool")
return false
}
// Int parses and returns field i as an integer value.
func (p *Parser) Int(i int) int {
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
p.setError(err, "error parsing int")
return int(x)
}
// Uint parses and returns field i as an unsigned integer value.
func (p *Parser) Uint(i int) uint {
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
p.setError(err, "error parsing uint")
return uint(x)
}
// Float parses and returns field i as a decimal value.
func (p *Parser) Float(i int) float64 {
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
p.setError(err, "error parsing float")
return x
}
// String parses and returns field i as a string value.
func (p *Parser) String(i int) string {
return string(p.getField(i))
}
// Strings parses and returns field i as a space-separated list of strings.
func (p *Parser) Strings(i int) []string {
ss := strings.Split(string(p.getField(i)), " ")
for i, s := range ss {
ss[i] = strings.TrimSpace(s)
}
return ss
}
// Comment returns the comments for the current line.
func (p *Parser) Comment() string {
return string(p.comment)
}
var errUndefinedEnum = errors.New("ucd: undefined enum value")
// Enum interprets and returns field i as a value that must be one of the values
// in enum.
func (p *Parser) Enum(i int, enum ...string) string {
f := p.getField(i)
for _, s := range enum {
if f == s {
return s
}
}
p.setError(errUndefinedEnum, "error parsing enum")
return ""
}

187
vendor/golang.org/x/text/language/coverage.go generated vendored Normal file
View file

@ -0,0 +1,187 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"fmt"
"sort"
"golang.org/x/text/internal/language"
)
// The Coverage interface is used to define the level of coverage of an
// internationalization service. Note that not all types are supported by all
// services. As lists may be generated on the fly, it is recommended that users
// of a Coverage cache the results.
type Coverage interface {
// Tags returns the list of supported tags.
Tags() []Tag
// BaseLanguages returns the list of supported base languages.
BaseLanguages() []Base
// Scripts returns the list of supported scripts.
Scripts() []Script
// Regions returns the list of supported regions.
Regions() []Region
}
var (
// Supported defines a Coverage that lists all supported subtags. Tags
// always returns nil.
Supported Coverage = allSubtags{}
)
// TODO:
// - Support Variants, numbering systems.
// - CLDR coverage levels.
// - Set of common tags defined in this package.
type allSubtags struct{}
// Regions returns the list of supported regions. As all regions are in a
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" region is not returned.
func (s allSubtags) Regions() []Region {
reg := make([]Region, language.NumRegions)
for i := range reg {
reg[i] = Region{language.Region(i + 1)}
}
return reg
}
// Scripts returns the list of supported scripts. As all scripts are in a
// consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" script is not returned.
func (s allSubtags) Scripts() []Script {
scr := make([]Script, language.NumScripts)
for i := range scr {
scr[i] = Script{language.Script(i + 1)}
}
return scr
}
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func (s allSubtags) BaseLanguages() []Base {
bs := language.BaseLanguages()
base := make([]Base, len(bs))
for i, b := range bs {
base[i] = Base{b}
}
return base
}
// Tags always returns nil.
func (s allSubtags) Tags() []Tag {
return nil
}
// coverage is used by NewCoverage which is used as a convenient way for
// creating Coverage implementations for partially defined data. Very often a
// package will only need to define a subset of slices. coverage provides a
// convenient way to do this. Moreover, packages using NewCoverage, instead of
// their own implementation, will not break if later new slice types are added.
type coverage struct {
tags func() []Tag
bases func() []Base
scripts func() []Script
regions func() []Region
}
func (s *coverage) Tags() []Tag {
if s.tags == nil {
return nil
}
return s.tags()
}
// bases implements sort.Interface and is used to sort base languages.
type bases []Base
func (b bases) Len() int {
return len(b)
}
func (b bases) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bases) Less(i, j int) bool {
return b[i].langID < b[j].langID
}
// BaseLanguages returns the result from calling s.bases if it is specified or
// otherwise derives the set of supported base languages from tags.
func (s *coverage) BaseLanguages() []Base {
if s.bases == nil {
tags := s.Tags()
if len(tags) == 0 {
return nil
}
a := make([]Base, len(tags))
for i, t := range tags {
a[i] = Base{language.Language(t.lang())}
}
sort.Sort(bases(a))
k := 0
for i := 1; i < len(a); i++ {
if a[k] != a[i] {
k++
a[k] = a[i]
}
}
return a[:k+1]
}
return s.bases()
}
func (s *coverage) Scripts() []Script {
if s.scripts == nil {
return nil
}
return s.scripts()
}
func (s *coverage) Regions() []Region {
if s.regions == nil {
return nil
}
return s.regions()
}
// NewCoverage returns a Coverage for the given lists. It is typically used by
// packages providing internationalization services to define their level of
// coverage. A list may be of type []T or func() []T, where T is either Tag,
// Base, Script or Region. The returned Coverage derives the value for Bases
// from Tags if no func or slice for []Base is specified. For other unspecified
// types the returned Coverage will return nil for the respective methods.
func NewCoverage(list ...interface{}) Coverage {
s := &coverage{}
for _, x := range list {
switch v := x.(type) {
case func() []Base:
s.bases = v
case func() []Script:
s.scripts = v
case func() []Region:
s.regions = v
case func() []Tag:
s.tags = v
case []Base:
s.bases = func() []Base { return v }
case []Script:
s.scripts = func() []Script { return v }
case []Region:
s.regions = func() []Region { return v }
case []Tag:
s.tags = func() []Tag { return v }
default:
panic(fmt.Sprintf("language: unsupported set type %T", v))
}
}
return s
}

102
vendor/golang.org/x/text/language/doc.go generated vendored Normal file
View file

@ -0,0 +1,102 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package language implements BCP 47 language tags and related functionality.
//
// The most important function of package language is to match a list of
// user-preferred languages to a list of supported languages.
// It alleviates the developer of dealing with the complexity of this process
// and provides the user with the best experience
// (see https://blog.golang.org/matchlang).
//
//
// Matching preferred against supported languages
//
// A Matcher for an application that supports English, Australian English,
// Danish, and standard Mandarin can be created as follows:
//
// var matcher = language.NewMatcher([]language.Tag{
// language.English, // The first language is used as fallback.
// language.MustParse("en-AU"),
// language.Danish,
// language.Chinese,
// })
//
// This list of supported languages is typically implied by the languages for
// which there exists translations of the user interface.
//
// User-preferred languages usually come as a comma-separated list of BCP 47
// language tags.
// The MatchString finds best matches for such strings:
//
// handler(w http.ResponseWriter, r *http.Request) {
// lang, _ := r.Cookie("lang")
// accept := r.Header.Get("Accept-Language")
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
//
// // tag should now be used for the initialization of any
// // locale-specific service.
// }
//
// The Matcher's Match method can be used to match Tags directly.
//
// Matchers are aware of the intricacies of equivalence between languages, such
// as deprecated subtags, legacy tags, macro languages, mutual
// intelligibility between scripts and languages, and transparently passing
// BCP 47 user configuration.
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
// and will know that Cantonese ("yue") is a good match for "zh-HK".
//
//
// Using match results
//
// To guarantee a consistent user experience to the user it is important to
// use the same language tag for the selection of any locale-specific services.
// For example, it is utterly confusing to substitute spelled-out numbers
// or dates in one language in text of another language.
// More subtly confusing is using the wrong sorting order or casing
// algorithm for a certain language.
//
// All the packages in x/text that provide locale-specific services
// (e.g. collate, cases) should be initialized with the tag that was
// obtained at the start of an interaction with the user.
//
// Note that Tag that is returned by Match and MatchString may differ from any
// of the supported languages, as it may contain carried over settings from
// the user tags.
// This may be inconvenient when your application has some additional
// locale-specific data for your supported languages.
// Match and MatchString both return the index of the matched supported tag
// to simplify associating such data with the matched tag.
//
//
// Canonicalization
//
// If one uses the Matcher to compare languages one does not need to
// worry about canonicalization.
//
// The meaning of a Tag varies per application. The language package
// therefore delays canonicalization and preserves information as much
// as possible. The Matcher, however, will always take into account that
// two different tags may represent the same language.
//
// By default, only legacy and deprecated tags are converted into their
// canonical equivalent. All other information is preserved. This approach makes
// the confidence scores more accurate and allows matchers to distinguish
// between variants that are otherwise lost.
//
// As a consequence, two tags that should be treated as identical according to
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
// Matcher handles such distinctions, though, and is aware of the
// equivalence relations. The CanonType type can be used to alter the
// canonicalization form.
//
// References
//
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
//
package language // import "golang.org/x/text/language"
// TODO: explanation on how to match languages for your own locale-specific
// service.

305
vendor/golang.org/x/text/language/gen.go generated vendored Normal file
View file

@ -0,0 +1,305 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Language tag table generator.
// Data read from the web.
package main
import (
"flag"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output",
"tables.go",
"output file for generated tables")
)
func main() {
gen.Init()
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "language")
b := newBuilder(w)
gen.WriteCLDRVersion(w)
b.writeConstants()
b.writeMatchData()
}
type builder struct {
w *gen.CodeWriter
hw io.Writer // MultiWriter for w and w.Hash
data *cldr.CLDR
supp *cldr.SupplementalData
}
func (b *builder) langIndex(s string) uint16 {
return uint16(language.MustParseBase(s))
}
func (b *builder) regionIndex(s string) int {
return int(language.MustParseRegion(s))
}
func (b *builder) scriptIndex(s string) int {
return int(language.MustParseScript(s))
}
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatal(err)
}
b := builder{
w: w,
hw: io.MultiWriter(w, w.Hash),
data: data,
supp: data.Supplemental(),
}
return &b
}
// writeConsts computes f(v) for all v in values and writes the results
// as constants named _v to a single constant block.
func (b *builder) writeConsts(f func(string) int, values ...string) {
fmt.Fprintln(b.w, "const (")
for _, v := range values {
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
}
fmt.Fprintln(b.w, ")")
}
// TODO: region inclusion data will probably not be use used in future matchers.
var langConsts = []string{
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
}
var scriptConsts = []string{
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
"Zzzz",
}
var regionConsts = []string{
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
}
func (b *builder) writeConstants() {
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
b.writeConsts(b.regionIndex, regionConsts...)
b.writeConsts(b.scriptIndex, scriptConsts...)
}
type mutualIntelligibility struct {
want, have uint16
distance uint8
oneway bool
}
type scriptIntelligibility struct {
wantLang, haveLang uint16
wantScript, haveScript uint8
distance uint8
// Always oneway
}
type regionIntelligibility struct {
lang uint16 // compact language id
script uint8 // 0 means any
group uint8 // 0 means any; if bit 7 is set it means inverse
distance uint8
// Always twoway.
}
// writeMatchData writes tables with languages and scripts for which there is
// mutual intelligibility. The data is based on CLDR's languageMatching data.
// Note that we use a different algorithm than the one defined by CLDR and that
// we slightly modify the data. For example, we convert scores to confidence levels.
// We also drop all region-related data as we use a different algorithm to
// determine region equivalence.
func (b *builder) writeMatchData() {
lm := b.supp.LanguageMatching.LanguageMatches
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
regionHierarchy := map[string][]string{}
for _, g := range b.supp.TerritoryContainment.Group {
regions := strings.Split(g.Contains, " ")
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
}
regionToGroups := make([]uint8, language.NumRegions)
idToIndex := map[string]uint8{}
for i, mv := range lm[0].MatchVariable {
if i > 6 {
log.Fatalf("Too many groups: %d", i)
}
idToIndex[mv.Id] = uint8(i + 1)
// TODO: also handle '-'
for _, r := range strings.Split(mv.Value, "+") {
todo := []string{r}
for k := 0; k < len(todo); k++ {
r := todo[k]
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
todo = append(todo, regionHierarchy[r]...)
}
}
}
b.w.WriteVar("regionToGroups", regionToGroups)
// maps language id to in- and out-of-group region.
paradigmLocales := [][3]uint16{}
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
for i := 0; i < len(locales); i += 2 {
x := [3]uint16{}
for j := 0; j < 2; j++ {
pc := strings.SplitN(locales[i+j], "-", 2)
x[0] = b.langIndex(pc[0])
if len(pc) == 2 {
x[1+j] = uint16(b.regionIndex(pc[1]))
}
}
paradigmLocales = append(paradigmLocales, x)
}
b.w.WriteVar("paradigmLocales", paradigmLocales)
b.w.WriteType(mutualIntelligibility{})
b.w.WriteType(scriptIntelligibility{})
b.w.WriteType(regionIntelligibility{})
matchLang := []mutualIntelligibility{}
matchScript := []scriptIntelligibility{}
matchRegion := []regionIntelligibility{}
// Convert the languageMatch entries in lists keyed by desired language.
for _, m := range lm[0].LanguageMatch {
// Different versions of CLDR use different separators.
desired := strings.Replace(m.Desired, "-", "_", -1)
supported := strings.Replace(m.Supported, "-", "_", -1)
d := strings.Split(desired, "_")
s := strings.Split(supported, "_")
if len(d) != len(s) {
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
continue
}
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
switch len(d) {
case 2:
if desired == supported && desired == "*_*" {
continue
}
// language-script pair.
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(d[0])),
haveLang: uint16(b.langIndex(s[0])),
wantScript: uint8(b.scriptIndex(d[1])),
haveScript: uint8(b.scriptIndex(s[1])),
distance: uint8(distance),
})
if m.Oneway != "true" {
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(s[0])),
haveLang: uint16(b.langIndex(d[0])),
wantScript: uint8(b.scriptIndex(s[1])),
haveScript: uint8(b.scriptIndex(d[1])),
distance: uint8(distance),
})
}
case 1:
if desired == supported && desired == "*" {
continue
}
if distance == 1 {
// nb == no is already handled by macro mapping. Check there
// really is only this case.
if d[0] != "no" || s[0] != "nb" {
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
}
continue
}
// TODO: consider dropping oneway field and just doubling the entry.
matchLang = append(matchLang, mutualIntelligibility{
want: uint16(b.langIndex(d[0])),
have: uint16(b.langIndex(s[0])),
distance: uint8(distance),
oneway: m.Oneway == "true",
})
case 3:
if desired == supported && desired == "*_*_*" {
continue
}
if desired != supported {
// This is now supported by CLDR, but only one case, which
// should already be covered by paradigm locales. For instance,
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
// testdata/CLDRLocaleMatcherTest.txt tests this.
if supported != "en_*_GB" {
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
}
continue
}
ri := regionIntelligibility{
lang: b.langIndex(d[0]),
distance: uint8(distance),
}
if d[1] != "*" {
ri.script = uint8(b.scriptIndex(d[1]))
}
switch {
case d[2] == "*":
ri.group = 0x80 // not contained in anything
case strings.HasPrefix(d[2], "$!"):
ri.group = 0x80
d[2] = "$" + d[2][len("$!"):]
fallthrough
case strings.HasPrefix(d[2], "$"):
ri.group |= idToIndex[d[2]]
}
matchRegion = append(matchRegion, ri)
default:
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
}
}
sort.SliceStable(matchLang, func(i, j int) bool {
return matchLang[i].distance < matchLang[j].distance
})
b.w.WriteComment(`
matchLang holds pairs of langIDs of base languages that are typically
mutually intelligible. Each pair is associated with a confidence and
whether the intelligibility goes one or both ways.`)
b.w.WriteVar("matchLang", matchLang)
b.w.WriteComment(`
matchScript holds pairs of scriptIDs where readers of one script
can typically also read the other. Each is associated with a confidence.`)
sort.SliceStable(matchScript, func(i, j int) bool {
return matchScript[i].distance < matchScript[j].distance
})
b.w.WriteVar("matchScript", matchScript)
sort.SliceStable(matchRegion, func(i, j int) bool {
return matchRegion[i].distance < matchRegion[j].distance
})
b.w.WriteVar("matchRegion", matchRegion)
}

38
vendor/golang.org/x/text/language/go1_1.go generated vendored Normal file
View file

@ -0,0 +1,38 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.2
package language
import "sort"
func sortStable(s sort.Interface) {
ss := stableSort{
s: s,
pos: make([]int, s.Len()),
}
for i := range ss.pos {
ss.pos[i] = i
}
sort.Sort(&ss)
}
type stableSort struct {
s sort.Interface
pos []int
}
func (s *stableSort) Len() int {
return len(s.pos)
}
func (s *stableSort) Less(i, j int) bool {
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
}
func (s *stableSort) Swap(i, j int) {
s.s.Swap(i, j)
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
}

11
vendor/golang.org/x/text/language/go1_2.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.2
package language
import "sort"
var sortStable = sort.Stable

601
vendor/golang.org/x/text/language/language.go generated vendored Normal file
View file

@ -0,0 +1,601 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go -output tables.go
package language
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"strings"
"golang.org/x/text/internal/language"
"golang.org/x/text/internal/language/compact"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag compact.Tag
func makeTag(t language.Tag) (tag Tag) {
return Tag(compact.Make(t))
}
func (t *Tag) tag() language.Tag {
return (*compact.Tag)(t).Tag()
}
func (t *Tag) isCompact() bool {
return (*compact.Tag)(t).IsCompact()
}
// TODO: improve performance.
func (t *Tag) lang() language.Language { return t.tag().LangID }
func (t *Tag) region() language.Region { return t.tag().RegionID }
func (t *Tag) script() language.Script { return t.tag().ScriptID }
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
return Default.Make(s)
}
// Make is a convenience wrapper for c.Parse that omits the error.
// In case of an error, a sensible default is returned.
func (c CanonType) Make(s string) Tag {
t, _ := c.Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
func (t Tag) Raw() (b Base, s Script, r Region) {
tt := t.tag()
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
return compact.Tag(t).IsRoot()
}
// CanonType can be used to enable or disable various types of canonicalization.
type CanonType int
const (
// Replace deprecated base languages with their preferred replacements.
DeprecatedBase CanonType = 1 << iota
// Replace deprecated scripts with their preferred replacements.
DeprecatedScript
// Replace deprecated regions with their preferred replacements.
DeprecatedRegion
// Remove redundant scripts.
SuppressScript
// Normalize legacy encodings. This includes legacy languages defined in
// CLDR as well as bibliographic codes defined in ISO-639.
Legacy
// Map the dominant language of a macro language group to the macro language
// subtag. For example cmn -> zh.
Macro
// The CLDR flag should be used if full compatibility with CLDR is required.
// There are a few cases where language.Tag may differ from CLDR. To follow all
// of CLDR's suggestions, use All|CLDR.
CLDR
// Raw can be used to Compose or Parse without Canonicalization.
Raw CanonType = 0
// Replace all deprecated tags with their preferred replacements.
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
// All canonicalizations recommended by BCP 47.
BCP47 = Deprecated | SuppressScript
// All canonicalizations.
All = BCP47 | Legacy | Macro
// Default is the canonicalization used by Parse, Make and Compose. To
// preserve as much information as possible, canonicalizations that remove
// potentially valuable information are not included. The Matcher is
// designed to recognize similar tags that would be the same if
// they were canonicalized using All.
Default = Deprecated | Legacy
canonLang = DeprecatedBase | Legacy | Macro
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
)
// canonicalize returns the canonicalized equivalent of the tag and
// whether there was any change.
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
if c == Raw {
return t, false
}
changed := false
if c&SuppressScript != 0 {
if t.LangID.SuppressScript() == t.ScriptID {
t.ScriptID = 0
changed = true
}
}
if c&canonLang != 0 {
for {
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
switch aliasType {
case language.Legacy:
if c&Legacy != 0 {
if t.LangID == _sh && t.ScriptID == 0 {
t.ScriptID = _Latn
}
t.LangID = l
changed = true
}
case language.Macro:
if c&Macro != 0 {
// We deviate here from CLDR. The mapping "nb" -> "no"
// qualifies as a typical Macro language mapping. However,
// for legacy reasons, CLDR maps "no", the macro language
// code for Norwegian, to the dominant variant "nb". This
// change is currently under consideration for CLDR as well.
// See https://unicode.org/cldr/trac/ticket/2698 and also
// https://unicode.org/cldr/trac/ticket/1790 for some of the
// practical implications. TODO: this check could be removed
// if CLDR adopts this change.
if c&CLDR == 0 || t.LangID != _nb {
changed = true
t.LangID = l
}
}
case language.Deprecated:
if c&DeprecatedBase != 0 {
if t.LangID == _mo && t.RegionID == 0 {
t.RegionID = _MD
}
t.LangID = l
changed = true
// Other canonicalization types may still apply.
continue
}
}
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
t.LangID = _nb
changed = true
}
break
}
}
if c&DeprecatedScript != 0 {
if t.ScriptID == _Qaai {
changed = true
t.ScriptID = _Zinh
}
}
if c&DeprecatedRegion != 0 {
if r := t.RegionID.Canonicalize(); r != t.RegionID {
changed = true
t.RegionID = r
}
}
return t, changed
}
// Canonicalize returns the canonicalized equivalent of the tag.
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
// First try fast path.
if t.isCompact() {
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
return t, nil
}
}
// It is unlikely that one will canonicalize a tag after matching. So do
// a slow but simple approach here.
if tag, changed := canonicalize(c, t.tag()); changed {
tag.RemakeString()
return makeTag(tag), nil
}
return t, nil
}
// Confidence indicates the level of certainty for a given return value.
// For example, Serbian may be written in Cyrillic or Latin script.
// The confidence level indicates whether a value was explicitly specified,
// whether it is typically the only possible value, or whether there is
// an ambiguity.
type Confidence int
const (
No Confidence = iota // full confidence that there was no match
Low // most likely value picked out of a set of alternatives
High // value is generally assumed to be the correct match
Exact // exact match or explicitly specified value
)
var confName = []string{"No", "Low", "High", "Exact"}
func (c Confidence) String() string {
return confName[c]
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
return t.tag().String()
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
return t.tag().MarshalText()
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
var tag language.Tag
err := tag.UnmarshalText(text)
*t = makeTag(tag)
return err
}
// Base returns the base language of the language tag. If the base language is
// unspecified, an attempt will be made to infer it from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Base() (Base, Confidence) {
if b := t.lang(); b != 0 {
return Base{b}, Exact
}
tt := t.tag()
c := High
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
c = Low
}
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
return Base{tag.LangID}, c
}
return Base{0}, No
}
// Script infers the script for the language tag. If it was not explicitly given, it will infer
// a most likely candidate.
// If more than one script is commonly used for a language, the most likely one
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
// for Serbian.
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
// Note that an inferred script is never guaranteed to be the correct one. Latin is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Script() (Script, Confidence) {
if scr := t.script(); scr != 0 {
return Script{scr}, Exact
}
tt := t.tag()
sc, c := language.Script(_Zzzz), No
if scr := tt.LangID.SuppressScript(); scr != 0 {
// Note: it is not always the case that a language with a suppress
// script value is only written in one script (e.g. kk, ms, pa).
if tt.RegionID == 0 {
return Script{scr}, High
}
sc, c = scr, High
}
if tag, err := tt.Maximize(); err == nil {
if tag.ScriptID != sc {
sc, c = tag.ScriptID, Low
}
} else {
tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
sc, c = tag.ScriptID, Low
}
}
return Script{sc}, c
}
// Region returns the region for the language tag. If it was not explicitly given, it will
// infer a most likely candidate from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Region() (Region, Confidence) {
if r := t.region(); r != 0 {
return Region{r}, Exact
}
tt := t.tag()
if tt, err := tt.Maximize(); err == nil {
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
}
tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := tt.Maximize(); err == nil {
return Region{tag.RegionID}, Low
}
return Region{_ZZ}, No // TODO: return world instead of undetermined?
}
// Variants returns the variants specified explicitly for this language tag.
// or nil if no variant was specified.
func (t Tag) Variants() []Variant {
if !compact.Tag(t).MayHaveVariants() {
return nil
}
v := []Variant{}
x, str := "", t.tag().Variants()
for str != "" {
x, str = nextToken(str)
v = append(v, Variant{x})
}
return v
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
//
// Parent returns a tag for a less specific language that is mutually
// intelligible or Und if there is no such language. This may not be the same as
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
// is "zh-Hant", and the parent of "zh-Hant" is "und".
func (t Tag) Parent() Tag {
return Tag(compact.Tag(t).Parent())
}
// returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// Extension is a single BCP 47 extension.
type Extension struct {
s string
}
// String returns the string representation of the extension, including the
// type tag.
func (e Extension) String() string {
return e.s
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (e Extension, err error) {
ext, err := language.ParseExtension(s)
return Extension{ext}, err
}
// Type returns the one-byte extension type of e. It returns 0 for the zero
// exception.
func (e Extension) Type() byte {
if e.s == "" {
return 0
}
return e.s[0]
}
// Tokens returns the list of tokens of e.
func (e Extension) Tokens() []string {
return strings.Split(e.s, "-")
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
if !compact.Tag(t).MayHaveExtensions() {
return Extension{}, false
}
e, ok := t.tag().Extension(x)
return Extension{e}, ok
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []Extension {
if !compact.Tag(t).MayHaveExtensions() {
return nil
}
e := []Extension{}
for _, ext := range t.tag().Extensions() {
e = append(e, Extension{ext})
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if !compact.Tag(t).MayHaveExtensions() {
if key != "rg" && key != "va" {
return ""
}
}
return t.tag().TypeForKey(key)
}
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
tt, err := t.tag().SetTypeForKey(key, value)
return makeTag(tt), err
}
// NumCompactTags is the number of compact tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = compact.NumCompactTags
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func CompactIndex(t Tag) (index int, exact bool) {
id, exact := compact.LanguageID(compact.Tag(t))
return int(id), exact
}
var root = language.Tag{}
// Base is an ISO 639 language code, used for encoding the base language
// of a language tag.
type Base struct {
langID language.Language
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Base, error) {
l, err := language.ParseBase(s)
return Base{l}, err
}
// String returns the BCP 47 representation of the base language.
func (b Base) String() string {
return b.langID.String()
}
// ISO3 returns the ISO 639-3 language code.
func (b Base) ISO3() string {
return b.langID.ISO3()
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b Base) IsPrivateUse() bool {
return b.langID.IsPrivateUse()
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
scriptID language.Script
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
sc, err := language.ParseScript(s)
return Script{sc}, err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s Script) String() string {
return s.scriptID.String()
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s Script) IsPrivateUse() bool {
return s.scriptID.IsPrivateUse()
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID language.Region
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
rid, err := language.EncodeM49(r)
return Region{rid}, err
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
r, err := language.ParseRegion(s)
return Region{r}, err
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r Region) String() string {
return r.regionID.String()
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r Region) ISO3() string {
return r.regionID.ISO3()
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r Region) M49() int {
return r.regionID.M49()
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r Region) IsPrivateUse() bool {
return r.regionID.IsPrivateUse()
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
return r.regionID.IsCountry()
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
return r.regionID.IsGroup()
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
return r.regionID.Contains(c.regionID)
}
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
tld, err := r.regionID.TLD()
return Region{tld}, err
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
return Region{r.regionID.Canonicalize()}
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
variant string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
v, err := language.ParseVariant(s)
return Variant{v.String()}, err
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.variant
}

735
vendor/golang.org/x/text/language/match.go generated vendored Normal file
View file

@ -0,0 +1,735 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"errors"
"strings"
"golang.org/x/text/internal/language"
)
// A MatchOption configures a Matcher.
type MatchOption func(*matcher)
// PreferSameScript will, in the absence of a match, result in the first
// preferred tag with the same script as a supported tag to match this supported
// tag. The default is currently true, but this may change in the future.
func PreferSameScript(preferSame bool) MatchOption {
return func(m *matcher) { m.preferSameScript = preferSame }
}
// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
// There doesn't seem to be too much need for multiple types.
// Making it a concrete type allows MatchStrings to be a method, which will
// improve its discoverability.
// MatchStrings parses and matches the given strings until one of them matches
// the language in the Matcher. A string may be an Accept-Language header as
// handled by ParseAcceptLanguage. The default language is returned if no
// other language matched.
func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
for _, accept := range lang {
desired, _, err := ParseAcceptLanguage(accept)
if err != nil {
continue
}
if tag, index, conf := m.Match(desired...); conf != No {
return tag, index
}
}
tag, index, _ = m.Match()
return
}
// Matcher is the interface that wraps the Match method.
//
// Match returns the best match for any of the given tags, along with
// a unique index associated with the returned tag and a confidence
// score.
type Matcher interface {
Match(t ...Tag) (tag Tag, index int, c Confidence)
}
// Comprehends reports the confidence score for a speaker of a given language
// to being able to comprehend the written form of an alternative language.
func Comprehends(speaker, alternative Tag) Confidence {
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
return c
}
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
// against a list of supported tags based on written intelligibility, closeness
// of dialect, equivalence of subtags and various other rules. It is initialized
// with the list of supported tags. The first element is used as the default
// value in case no match is found.
//
// Its Match method matches the first of the given Tags to reach a certain
// confidence threshold. The tags passed to Match should therefore be specified
// in order of preference. Extensions are ignored for matching.
//
// The index returned by the Match method corresponds to the index of the
// matched tag in t, but is augmented with the Unicode extension ('u')of the
// corresponding preferred tag. This allows user locale options to be passed
// transparently.
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
return newMatcher(t, options)
}
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
var tt language.Tag
match, w, c := m.getBest(want...)
if match != nil {
tt, index = match.tag, match.index
} else {
// TODO: this should be an option
tt = m.default_.tag
if m.preferSameScript {
outer:
for _, w := range want {
script, _ := w.Script()
if script.scriptID == 0 {
// Don't do anything if there is no script, such as with
// private subtags.
continue
}
for i, h := range m.supported {
if script.scriptID == h.maxScript {
tt, index = h.tag, i
break outer
}
}
}
}
// TODO: select first language tag based on script.
}
if w.RegionID != tt.RegionID && w.RegionID != 0 {
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
tt.RegionID = w.RegionID
tt.RemakeString()
} else if r := w.RegionID.String(); len(r) == 2 {
// TODO: also filter macro and deprecated.
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
}
}
// Copy options from the user-provided tag into the result tag. This is hard
// to do after the fact, so we do it here.
// TODO: add in alternative variants to -u-va-.
// TODO: add preferred region to -u-rg-.
if e := w.Extensions(); len(e) > 0 {
b := language.Builder{}
b.SetTag(tt)
for _, e := range e {
b.AddExt(e)
}
tt = b.Make()
}
return makeTag(tt), index, c
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// func (t *Tag) setTagsFrom(id Tag) {
// t.LangID = id.LangID
// t.ScriptID = id.ScriptID
// t.RegionID = id.RegionID
// }
// Tag Matching
// CLDR defines an algorithm for finding the best match between two sets of language
// tags. The basic algorithm defines how to score a possible match and then find
// the match with the best score
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
// Using scoring has several disadvantages. The scoring obfuscates the importance of
// the various factors considered, making the algorithm harder to understand. Using
// scoring also requires the full score to be computed for each pair of tags.
//
// We will use a different algorithm which aims to have the following properties:
// - clarity on the precedence of the various selection factors, and
// - improved performance by allowing early termination of a comparison.
//
// Matching algorithm (overview)
// Input:
// - supported: a set of supported tags
// - default: the default tag to return in case there is no match
// - desired: list of desired tags, ordered by preference, starting with
// the most-preferred.
//
// Algorithm:
// 1) Set the best match to the lowest confidence level
// 2) For each tag in "desired":
// a) For each tag in "supported":
// 1) compute the match between the two tags.
// 2) if the match is better than the previous best match, replace it
// with the new match. (see next section)
// b) if the current best match is Exact and pin is true the result will be
// frozen to the language found thusfar, although better matches may
// still be found for the same language.
// 3) If the best match so far is below a certain threshold, return "default".
//
// Ranking:
// We use two phases to determine whether one pair of tags are a better match
// than another pair of tags. First, we determine a rough confidence level. If the
// levels are different, the one with the highest confidence wins.
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
// rules.
//
// The confidence level of matching a pair of tags is determined by finding the
// lowest confidence level of any matches of the corresponding subtags (the
// result is deemed as good as its weakest link).
// We define the following levels:
// Exact - An exact match of a subtag, before adding likely subtags.
// MaxExact - An exact match of a subtag, after adding likely subtags.
// [See Note 2].
// High - High level of mutual intelligibility between different subtag
// variants.
// Low - Low level of mutual intelligibility between different subtag
// variants.
// No - No mutual intelligibility.
//
// The following levels can occur for each type of subtag:
// Base: Exact, MaxExact, High, Low, No
// Script: Exact, MaxExact [see Note 3], Low, No
// Region: Exact, MaxExact, High
// Variant: Exact, High
// Private: Exact, No
//
// Any result with a confidence level of Low or higher is deemed a possible match.
// Once a desired tag matches any of the supported tags with a level of MaxExact
// or higher, the next desired tag is not considered (see Step 2.b).
// Note that CLDR provides languageMatching data that defines close equivalence
// classes for base languages, scripts and regions.
//
// Tie-breaking
// If we get the same confidence level for two matches, we apply a sequence of
// tie-breaking rules. The first that succeeds defines the result. The rules are
// applied in the following order.
// 1) Original language was defined and was identical.
// 2) Original region was defined and was identical.
// 3) Distance between two maximized regions was the smallest.
// 4) Original script was defined and was identical.
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
// If there is still no winner after these rules are applied, the first match
// found wins.
//
// Notes:
// [2] In practice, as matching of Exact is done in a separate phase from
// matching the other levels, we reuse the Exact level to mean MaxExact in
// the second phase. As a consequence, we only need the levels defined by
// the Confidence type. The MaxExact confidence level is mapped to High in
// the public API.
// [3] We do not differentiate between maximized script values that were derived
// from suppressScript versus most likely tag data. We determined that in
// ranking the two, one ranks just after the other. Moreover, the two cannot
// occur concurrently. As a consequence, they are identical for practical
// purposes.
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
// the MaxExact level to allow iw vs he to still be a closer match than
// en-AU vs en-US, for example.
// [5] In CLDR a locale inherits fields that are unspecified for this locale
// from its parent. Therefore, if a locale is a parent of another locale,
// it is a strong measure for closeness, especially when no other tie
// breaker rule applies. One could also argue it is inconsistent, for
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
// though its parent is pt-PT according to the inheritance rules.
//
// Implementation Details:
// There are several performance considerations worth pointing out. Most notably,
// we preprocess as much as possible (within reason) at the time of creation of a
// matcher. This includes:
// - creating a per-language map, which includes data for the raw base language
// and its canonicalized variant (if applicable),
// - expanding entries for the equivalence classes defined in CLDR's
// languageMatch data.
// The per-language map ensures that typically only a very small number of tags
// need to be considered. The pre-expansion of canonicalized subtags and
// equivalence classes reduces the amount of map lookups that need to be done at
// runtime.
// matcher keeps a set of supported language tags, indexed by language.
type matcher struct {
default_ *haveTag
supported []*haveTag
index map[language.Language]*matchHeader
passSettings bool
preferSameScript bool
}
// matchHeader has the lists of tags for exact matches and matches based on
// maximized and canonicalized tags for a given language.
type matchHeader struct {
haveTags []*haveTag
original bool
}
// haveTag holds a supported Tag and its maximized script and region. The maximized
// or canonicalized language is not stored as it is not needed during matching.
type haveTag struct {
tag language.Tag
// index of this tag in the original list of supported tags.
index int
// conf is the maximum confidence that can result from matching this haveTag.
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
conf Confidence
// Maximized region and script.
maxRegion language.Region
maxScript language.Script
// altScript may be checked as an alternative match to maxScript. If altScript
// matches, the confidence level for this match is Low. Theoretically there
// could be multiple alternative scripts. This does not occur in practice.
altScript language.Script
// nextMax is the index of the next haveTag with the same maximized tags.
nextMax uint16
}
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
max := tag
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
max, _ = canonicalize(All, max)
max, _ = max.Maximize()
max.RemakeString()
}
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
}
// altScript returns an alternative script that may match the given script with
// a low confidence. At the moment, the langMatch data allows for at most one
// script to map to another and we rely on this to keep the code simple.
func altScript(l language.Language, s language.Script) language.Script {
for _, alt := range matchScript {
// TODO: also match cases where language is not the same.
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
language.Script(alt.haveScript) == s {
return language.Script(alt.wantScript)
}
}
return 0
}
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
// Tags that have the same maximized values are linked by index.
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
h.original = h.original || exact
// Don't add new exact matches.
for _, v := range h.haveTags {
if equalsRest(v.tag, n.tag) {
return
}
}
// Allow duplicate maximized tags, but create a linked list to allow quickly
// comparing the equivalents and bail out.
for i, v := range h.haveTags {
if v.maxScript == n.maxScript &&
v.maxRegion == n.maxRegion &&
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
for h.haveTags[i].nextMax != 0 {
i = int(h.haveTags[i].nextMax)
}
h.haveTags[i].nextMax = uint16(len(h.haveTags))
break
}
}
h.haveTags = append(h.haveTags, &n)
}
// header returns the matchHeader for the given language. It creates one if
// it doesn't already exist.
func (m *matcher) header(l language.Language) *matchHeader {
if h := m.index[l]; h != nil {
return h
}
h := &matchHeader{}
m.index[l] = h
return h
}
func toConf(d uint8) Confidence {
if d <= 10 {
return High
}
if d < 30 {
return Low
}
return No
}
// newMatcher builds an index for the given supported tags and returns it as
// a matcher. It also expands the index by considering various equivalence classes
// for a given tag.
func newMatcher(supported []Tag, options []MatchOption) *matcher {
m := &matcher{
index: make(map[language.Language]*matchHeader),
preferSameScript: true,
}
for _, o := range options {
o(m)
}
if len(supported) == 0 {
m.default_ = &haveTag{}
return m
}
// Add supported languages to the index. Add exact matches first to give
// them precedence.
for i, tag := range supported {
tt := tag.tag()
pair, _ := makeHaveTag(tt, i)
m.header(tt.LangID).addIfNew(pair, true)
m.supported = append(m.supported, &pair)
}
m.default_ = m.header(supported[0].lang()).haveTags[0]
// Keep these in two different loops to support the case that two equivalent
// languages are distinguished, such as iw and he.
for i, tag := range supported {
tt := tag.tag()
pair, max := makeHaveTag(tt, i)
if max != tt.LangID {
m.header(max).addIfNew(pair, true)
}
}
// update is used to add indexes in the map for equivalent languages.
// update will only add entries to original indexes, thus not computing any
// transitive relations.
update := func(want, have uint16, conf Confidence) {
if hh := m.index[language.Language(have)]; hh != nil {
if !hh.original {
return
}
hw := m.header(language.Language(want))
for _, ht := range hh.haveTags {
v := *ht
if conf < v.conf {
v.conf = conf
}
v.nextMax = 0 // this value needs to be recomputed
if v.altScript != 0 {
v.altScript = altScript(language.Language(want), v.maxScript)
}
hw.addIfNew(v, conf == Exact && hh.original)
}
}
}
// Add entries for languages with mutual intelligibility as defined by CLDR's
// languageMatch data.
for _, ml := range matchLang {
update(ml.want, ml.have, toConf(ml.distance))
if !ml.oneway {
update(ml.have, ml.want, toConf(ml.distance))
}
}
// Add entries for possible canonicalizations. This is an optimization to
// ensure that only one map lookup needs to be done at runtime per desired tag.
// First we match deprecated equivalents. If they are perfect equivalents
// (their canonicalization simply substitutes a different language code, but
// nothing else), the match confidence is Exact, otherwise it is High.
for i, lm := range language.AliasMap {
// If deprecated codes match and there is no fiddling with the script or
// or region, we consider it an exact match.
conf := Exact
if language.AliasTypes[i] != language.Macro {
if !isExactEquivalent(language.Language(lm.From)) {
conf = High
}
update(lm.To, lm.From, conf)
}
update(lm.From, lm.To, conf)
}
return m
}
// getBest gets the best matching tag in m for any of the given tags, taking into
// account the order of preference of the given tags.
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
best := bestMatch{}
for i, ww := range want {
w := ww.tag()
var max language.Tag
// Check for exact match first.
h := m.index[w.LangID]
if w.LangID != 0 {
if h == nil {
continue
}
// Base language is defined.
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
// A region that is added through canonicalization is stronger than
// a maximized region: set it in the original (e.g. mo -> ro-MD).
if w.RegionID != max.RegionID {
w.RegionID = max.RegionID
}
// TODO: should we do the same for scripts?
// See test case: en, sr, nl ; sh ; sr
max, _ = max.Maximize()
} else {
// Base language is not defined.
if h != nil {
for i := range h.haveTags {
have := h.haveTags[i]
if equalsRest(have.tag, w) {
return have, w, Exact
}
}
}
if w.ScriptID == 0 && w.RegionID == 0 {
// We skip all tags matching und for approximate matching, including
// private tags.
continue
}
max, _ = w.Maximize()
if h = m.index[max.LangID]; h == nil {
continue
}
}
pin := true
for _, t := range want[i+1:] {
if w.LangID == t.lang() {
pin = false
break
}
}
// Check for match based on maximized tag.
for i := range h.haveTags {
have := h.haveTags[i]
best.update(have, w, max.ScriptID, max.RegionID, pin)
if best.conf == Exact {
for have.nextMax != 0 {
have = h.haveTags[have.nextMax]
best.update(have, w, max.ScriptID, max.RegionID, pin)
}
return best.have, best.want, best.conf
}
}
}
if best.conf <= No {
if len(want) != 0 {
return nil, want[0].tag(), No
}
return nil, language.Tag{}, No
}
return best.have, best.want, best.conf
}
// bestMatch accumulates the best match so far.
type bestMatch struct {
have *haveTag
want language.Tag
conf Confidence
pinnedRegion language.Region
pinLanguage bool
sameRegionGroup bool
// Cached results from applying tie-breaking rules.
origLang bool
origReg bool
paradigmReg bool
regGroupDist uint8
origScript bool
}
// update updates the existing best match if the new pair is considered to be a
// better match. To determine if the given pair is a better match, it first
// computes the rough confidence level. If this surpasses the current match, it
// will replace it and update the tie-breaker rule cache. If there is a tie, it
// proceeds with applying a series of tie-breaker rules. If there is no
// conclusive winner after applying the tie-breaker rules, it leaves the current
// match as the preferred match.
//
// If pin is true and have and tag are a strong match, it will henceforth only
// consider matches for this language. This corresponds to the nothing that most
// users have a strong preference for the first defined language. A user can
// still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
// be false.
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
// Bail if the maximum attainable confidence is below that of the current best match.
c := have.conf
if c < m.conf {
return
}
// Don't change the language once we already have found an exact match.
if m.pinLanguage && tag.LangID != m.want.LangID {
return
}
// Pin the region group if we are comparing tags for the same language.
if tag.LangID == m.want.LangID && m.sameRegionGroup {
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
if !sameGroup {
return
}
}
if c == Exact && have.maxScript == maxScript {
// If there is another language and then another entry of this language,
// don't pin anything, otherwise pin the language.
m.pinLanguage = pin
}
if equalsRest(have.tag, tag) {
} else if have.maxScript != maxScript {
// There is usually very little comprehension between different scripts.
// In a few cases there may still be Low comprehension. This possibility
// is pre-computed and stored in have.altScript.
if Low < m.conf || have.altScript != maxScript {
return
}
c = Low
} else if have.maxRegion != maxRegion {
if High < c {
// There is usually a small difference between languages across regions.
c = High
}
}
// We store the results of the computations of the tie-breaker rules along
// with the best match. There is no need to do the checks once we determine
// we have a winner, but we do still need to do the tie-breaker computations.
// We use "beaten" to keep track if we still need to do the checks.
beaten := false // true if the new pair defeats the current one.
if c != m.conf {
if c < m.conf {
return
}
beaten = true
}
// Tie-breaker rules:
// We prefer if the pre-maximized language was specified and identical.
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
if !beaten && m.origLang != origLang {
if m.origLang {
return
}
beaten = true
}
// We prefer if the pre-maximized region was specified and identical.
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
if !beaten && m.origReg != origReg {
if m.origReg {
return
}
beaten = true
}
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
if !beaten && m.regGroupDist != regGroupDist {
if regGroupDist > m.regGroupDist {
return
}
beaten = true
}
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
if !beaten && m.paradigmReg != paradigmReg {
if !paradigmReg {
return
}
beaten = true
}
// Next we prefer if the pre-maximized script was specified and identical.
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
if !beaten && m.origScript != origScript {
if m.origScript {
return
}
beaten = true
}
// Update m to the newly found best match.
if beaten {
m.have = have
m.want = tag
m.conf = c
m.pinnedRegion = maxRegion
m.sameRegionGroup = sameGroup
m.origLang = origLang
m.origReg = origReg
m.paradigmReg = paradigmReg
m.origScript = origScript
m.regGroupDist = regGroupDist
}
}
func isParadigmLocale(lang language.Language, r language.Region) bool {
for _, e := range paradigmLocales {
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
return true
}
}
return false
}
// regionGroupDist computes the distance between two regions based on their
// CLDR grouping.
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
const defaultDistance = 4
aGroup := uint(regionToGroups[a]) << 1
bGroup := uint(regionToGroups[b]) << 1
for _, ri := range matchRegion {
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
group := uint(1 << (ri.group &^ 0x80))
if 0x80&ri.group == 0 {
if aGroup&bGroup&group != 0 { // Both regions are in the group.
return ri.distance, ri.distance == defaultDistance
}
} else {
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
return ri.distance, ri.distance == defaultDistance
}
}
}
}
return defaultDistance, true
}
// equalsRest compares everything except the language.
func equalsRest(a, b language.Tag) bool {
// TODO: don't include extensions in this comparison. To do this efficiently,
// though, we should handle private tags separately.
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
}
// isExactEquivalent returns true if canonicalizing the language will not alter
// the script or region of a tag.
func isExactEquivalent(l language.Language) bool {
for _, o := range notEquivalent {
if o == l {
return false
}
}
return true
}
var notEquivalent []language.Language
func init() {
// Create a list of all languages for which canonicalization may alter the
// script or region.
for _, lm := range language.AliasMap {
tag := language.Tag{LangID: language.Language(lm.From)}
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
notEquivalent = append(notEquivalent, language.Language(lm.From))
}
}
// Maximize undefined regions of paradigm locales.
for i, v := range paradigmLocales {
t := language.Tag{LangID: language.Language(v[0])}
max, _ := t.Maximize()
if v[1] == 0 {
paradigmLocales[i][1] = uint16(max.RegionID)
}
if v[2] == 0 {
paradigmLocales[i][2] = uint16(max.RegionID)
}
}
}

228
vendor/golang.org/x/text/language/parse.go generated vendored Normal file
View file

@ -0,0 +1,228 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"errors"
"strconv"
"strings"
"golang.org/x/text/internal/language"
)
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError interface {
error
// Subtag returns the subtag for which the error occurred.
Subtag() string
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the default canonicalization type.
func Parse(s string) (t Tag, err error) {
return Default.Parse(s)
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
}
tt, changed := canonicalize(c, tt)
if changed {
tt.RemakeString()
}
return makeTag(tt), err
}
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. For -u extensions, though, the key-type pairs are
// added, where later values overwrite older ones. A Tag overwrites all former
// values and typically only makes sense as the first argument. The resulting
// tag is returned after canonicalizing using the Default CanonType. If one or
// more errors are encountered, one of the errors is returned.
func Compose(part ...interface{}) (t Tag, err error) {
return Default.Compose(part...)
}
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
// Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. For -u extensions, though, the key-type pairs are
// added, where later values overwrite older ones. A Tag overwrites all former
// values and typically only makes sense as the first argument. The resulting
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
}
b.Tag, _ = canonicalize(c, b.Tag)
return makeTag(b.Make()), err
}
var errInvalidArgument = errors.New("invalid Extension or Variant")
func update(b *language.Builder, part ...interface{}) (err error) {
for _, x := range part {
switch v := x.(type) {
case Tag:
b.SetTag(v.tag())
case Base:
b.Tag.LangID = v.langID
case Script:
b.Tag.ScriptID = v.scriptID
case Region:
b.Tag.RegionID = v.regionID
case Variant:
if v.variant == "" {
err = errInvalidArgument
break
}
b.AddVariant(v.variant)
case Extension:
if v.s == "" {
err = errInvalidArgument
break
}
b.SetExt(v.s)
case []Variant:
b.ClearVariants()
for _, v := range v {
b.AddVariant(v.variant)
}
case []Extension:
b.ClearExtensions()
for _, e := range v {
b.SetExt(e.s)
}
// TODO: support parsing of raw strings based on morphology or just extensions?
case error:
if v != nil {
err = v
}
}
}
return
}
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// ParseAcceptLanguage parses the contents of an Accept-Language header as
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
// a list of corresponding quality weights. It is more permissive than RFC 2616
// and may return non-nil slices even if the input is not valid.
// The Tags will be sorted by highest weight first and then by first occurrence.
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {
continue
}
entry, weight := split(entry, ';')
// Scan the language.
t, err := Parse(entry)
if err != nil {
id, ok := acceptFallback[entry]
if !ok {
return nil, nil, err
}
t = makeTag(language.Tag{LangID: id})
}
// Scan the optional weight.
w := 1.0
if weight != "" {
weight = consume(weight, 'q')
weight = consume(weight, '=')
// consume returns the empty string when a token could not be
// consumed, resulting in an error for ParseFloat.
if w, err = strconv.ParseFloat(weight, 32); err != nil {
return nil, nil, errInvalidWeight
}
// Drop tags with a quality weight of 0.
if w <= 0 {
continue
}
}
tag = append(tag, t)
q = append(q, float32(w))
}
sortStable(&tagSort{tag, q})
return tag, q, nil
}
// consume removes a leading token c from s and returns the result or the empty
// string if there is no such token.
func consume(s string, c byte) string {
if s == "" || s[0] != c {
return ""
}
return strings.TrimSpace(s[1:])
}
func split(s string, c byte) (head, tail string) {
if i := strings.IndexByte(s, c); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
}
return strings.TrimSpace(s), ""
}
// Add hack mapping to deal with a small number of cases that occur
// in Accept-Language (with reasonable frequency).
var acceptFallback = map[string]language.Language{
"english": _en,
"deutsch": _de,
"italian": _it,
"french": _fr,
"*": _mul, // defined in the spec to match all languages.
}
type tagSort struct {
tag []Tag
q []float32
}
func (s *tagSort) Len() int {
return len(s.q)
}
func (s *tagSort) Less(i, j int) bool {
return s.q[i] > s.q[j]
}
func (s *tagSort) Swap(i, j int) {
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
s.q[i], s.q[j] = s.q[j], s.q[i]
}

298
vendor/golang.org/x/text/language/tables.go generated vendored Normal file
View file

@ -0,0 +1,298 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package language
// CLDRVersion is the CLDR version from which the tables in this package are derived.
const CLDRVersion = "32"
const (
_de = 269
_en = 313
_fr = 350
_it = 505
_mo = 784
_no = 879
_nb = 839
_pt = 960
_sh = 1031
_mul = 806
_und = 0
)
const (
_001 = 1
_419 = 31
_BR = 65
_CA = 73
_ES = 110
_GB = 123
_MD = 188
_PT = 238
_UK = 306
_US = 309
_ZZ = 357
_XA = 323
_XC = 325
_XK = 333
)
const (
_Latn = 87
_Hani = 54
_Hans = 56
_Hant = 57
_Qaaa = 139
_Qaai = 147
_Qabx = 188
_Zinh = 236
_Zyyy = 241
_Zzzz = 242
)
var regionToGroups = []uint8{ // 357 elements
// Entry 0 - 3F
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
// Entry 40 - 7F
0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
// Entry 80 - BF
0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
// Entry C0 - FF
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// Entry 100 - 13F
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
// Entry 140 - 17F
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
} // Size: 381 bytes
var paradigmLocales = [][3]uint16{ // 3 elements
0: [3]uint16{0x139, 0x0, 0x7b},
1: [3]uint16{0x13e, 0x0, 0x1f},
2: [3]uint16{0x3c0, 0x41, 0xee},
} // Size: 42 bytes
type mutualIntelligibility struct {
want uint16
have uint16
distance uint8
oneway bool
}
type scriptIntelligibility struct {
wantLang uint16
haveLang uint16
wantScript uint8
haveScript uint8
distance uint8
}
type regionIntelligibility struct {
lang uint16
script uint8
group uint8
distance uint8
}
// matchLang holds pairs of langIDs of base languages that are typically
// mutually intelligible. Each pair is associated with a confidence and
// whether the intelligibility goes one or both ways.
var matchLang = []mutualIntelligibility{ // 113 elements
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
} // Size: 702 bytes
// matchScript holds pairs of scriptIDs where readers of one script
// can typically also read the other. Each is associated with a confidence.
var matchScript = []scriptIntelligibility{ // 26 elements
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
} // Size: 232 bytes
var matchRegion = []regionIntelligibility{ // 15 elements
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
} // Size: 114 bytes
// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46

145
vendor/golang.org/x/text/language/tags.go generated vendored Normal file
View file

@ -0,0 +1,145 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "golang.org/x/text/internal/language/compact"
// TODO: Various sets of commonly use tags and regions.
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func MustParse(s string) Tag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func (c CanonType) MustParse(s string) Tag {
t, err := c.Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
func MustParseBase(s string) Base {
b, err := ParseBase(s)
if err != nil {
panic(err)
}
return b
}
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
}
return scr
}
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)
}
return r
}
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag(compact.Afrikaans)
Amharic Tag = Tag(compact.Amharic)
Arabic Tag = Tag(compact.Arabic)
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
Azerbaijani Tag = Tag(compact.Azerbaijani)
Bulgarian Tag = Tag(compact.Bulgarian)
Bengali Tag = Tag(compact.Bengali)
Catalan Tag = Tag(compact.Catalan)
Czech Tag = Tag(compact.Czech)
Danish Tag = Tag(compact.Danish)
German Tag = Tag(compact.German)
Greek Tag = Tag(compact.Greek)
English Tag = Tag(compact.English)
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
BritishEnglish Tag = Tag(compact.BritishEnglish)
Spanish Tag = Tag(compact.Spanish)
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
Estonian Tag = Tag(compact.Estonian)
Persian Tag = Tag(compact.Persian)
Finnish Tag = Tag(compact.Finnish)
Filipino Tag = Tag(compact.Filipino)
French Tag = Tag(compact.French)
CanadianFrench Tag = Tag(compact.CanadianFrench)
Gujarati Tag = Tag(compact.Gujarati)
Hebrew Tag = Tag(compact.Hebrew)
Hindi Tag = Tag(compact.Hindi)
Croatian Tag = Tag(compact.Croatian)
Hungarian Tag = Tag(compact.Hungarian)
Armenian Tag = Tag(compact.Armenian)
Indonesian Tag = Tag(compact.Indonesian)
Icelandic Tag = Tag(compact.Icelandic)
Italian Tag = Tag(compact.Italian)
Japanese Tag = Tag(compact.Japanese)
Georgian Tag = Tag(compact.Georgian)
Kazakh Tag = Tag(compact.Kazakh)
Khmer Tag = Tag(compact.Khmer)
Kannada Tag = Tag(compact.Kannada)
Korean Tag = Tag(compact.Korean)
Kirghiz Tag = Tag(compact.Kirghiz)
Lao Tag = Tag(compact.Lao)
Lithuanian Tag = Tag(compact.Lithuanian)
Latvian Tag = Tag(compact.Latvian)
Macedonian Tag = Tag(compact.Macedonian)
Malayalam Tag = Tag(compact.Malayalam)
Mongolian Tag = Tag(compact.Mongolian)
Marathi Tag = Tag(compact.Marathi)
Malay Tag = Tag(compact.Malay)
Burmese Tag = Tag(compact.Burmese)
Nepali Tag = Tag(compact.Nepali)
Dutch Tag = Tag(compact.Dutch)
Norwegian Tag = Tag(compact.Norwegian)
Punjabi Tag = Tag(compact.Punjabi)
Polish Tag = Tag(compact.Polish)
Portuguese Tag = Tag(compact.Portuguese)
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
Romanian Tag = Tag(compact.Romanian)
Russian Tag = Tag(compact.Russian)
Sinhala Tag = Tag(compact.Sinhala)
Slovak Tag = Tag(compact.Slovak)
Slovenian Tag = Tag(compact.Slovenian)
Albanian Tag = Tag(compact.Albanian)
Serbian Tag = Tag(compact.Serbian)
SerbianLatin Tag = Tag(compact.SerbianLatin)
Swedish Tag = Tag(compact.Swedish)
Swahili Tag = Tag(compact.Swahili)
Tamil Tag = Tag(compact.Tamil)
Telugu Tag = Tag(compact.Telugu)
Thai Tag = Tag(compact.Thai)
Turkish Tag = Tag(compact.Turkish)
Ukrainian Tag = Tag(compact.Ukrainian)
Urdu Tag = Tag(compact.Urdu)
Uzbek Tag = Tag(compact.Uzbek)
Vietnamese Tag = Tag(compact.Vietnamese)
Chinese Tag = Tag(compact.Chinese)
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
Zulu Tag = Tag(compact.Zulu)
)

336
vendor/golang.org/x/text/secure/bidirule/bidirule.go generated vendored Normal file
View file

@ -0,0 +1,336 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bidirule implements the Bidi Rule defined by RFC 5893.
//
// This package is under development. The API may change without notice and
// without preserving backward compatibility.
package bidirule
import (
"errors"
"unicode/utf8"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/bidi"
)
// This file contains an implementation of RFC 5893: Right-to-Left Scripts for
// Internationalized Domain Names for Applications (IDNA)
//
// A label is an individual component of a domain name. Labels are usually
// shown separated by dots; for example, the domain name "www.example.com" is
// composed of three labels: "www", "example", and "com".
//
// An RTL label is a label that contains at least one character of class R, AL,
// or AN. An LTR label is any label that is not an RTL label.
//
// A "Bidi domain name" is a domain name that contains at least one RTL label.
//
// The following guarantees can be made based on the above:
//
// o In a domain name consisting of only labels that satisfy the rule,
// the requirements of Section 3 are satisfied. Note that even LTR
// labels and pure ASCII labels have to be tested.
//
// o In a domain name consisting of only LDH labels (as defined in the
// Definitions document [RFC5890]) and labels that satisfy the rule,
// the requirements of Section 3 are satisfied as long as a label
// that starts with an ASCII digit does not come after a
// right-to-left label.
//
// No guarantee is given for other combinations.
// ErrInvalid indicates a label is invalid according to the Bidi Rule.
var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
type ruleState uint8
const (
ruleInitial ruleState = iota
ruleLTR
ruleLTRFinal
ruleRTL
ruleRTLFinal
ruleInvalid
)
type ruleTransition struct {
next ruleState
mask uint16
}
var transitions = [...][2]ruleTransition{
// [2.1] The first character must be a character with Bidi property L, R, or
// AL. If it has the R or AL property, it is an RTL label; if it has the L
// property, it is an LTR label.
ruleInitial: {
{ruleLTRFinal, 1 << bidi.L},
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
},
ruleRTL: {
// [2.3] In an RTL label, the end of the label must be a character with
// Bidi property R, AL, EN, or AN, followed by zero or more characters
// with Bidi property NSM.
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
// [2.2] In an RTL label, only characters with the Bidi properties R,
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.3]
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
},
ruleRTLFinal: {
// [2.3] In an RTL label, the end of the label must be a character with
// Bidi property R, AL, EN, or AN, followed by zero or more characters
// with Bidi property NSM.
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
// [2.2] In an RTL label, only characters with the Bidi properties R,
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.3] and NSM.
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
},
ruleLTR: {
// [2.6] In an LTR label, the end of the label must be a character with
// Bidi property L or EN, followed by zero or more characters with Bidi
// property NSM.
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
// [2.5] In an LTR label, only characters with the Bidi properties L,
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.6].
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
},
ruleLTRFinal: {
// [2.6] In an LTR label, the end of the label must be a character with
// Bidi property L or EN, followed by zero or more characters with Bidi
// property NSM.
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
// [2.5] In an LTR label, only characters with the Bidi properties L,
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
// We exclude the entries from [2.6].
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
},
ruleInvalid: {
{ruleInvalid, 0},
{ruleInvalid, 0},
},
}
// [2.4] In an RTL label, if an EN is present, no AN may be present, and
// vice versa.
const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
// From RFC 5893
// An RTL label is a label that contains at least one character of type
// R, AL, or AN.
//
// An LTR label is any label that is not an RTL label.
// Direction reports the direction of the given label as defined by RFC 5893.
// The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func Direction(b []byte) bidi.Direction {
for i := 0; i < len(b); {
e, sz := bidi.Lookup(b[i:])
if sz == 0 {
i++
}
c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN {
return bidi.RightToLeft
}
i += sz
}
return bidi.LeftToRight
}
// DirectionString reports the direction of the given label as defined by RFC
// 5893. The Bidi Rule does not have to be applied to labels of the category
// LeftToRight.
func DirectionString(s string) bidi.Direction {
for i := 0; i < len(s); {
e, sz := bidi.LookupString(s[i:])
if sz == 0 {
i++
continue
}
c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN {
return bidi.RightToLeft
}
i += sz
}
return bidi.LeftToRight
}
// Valid reports whether b conforms to the BiDi rule.
func Valid(b []byte) bool {
var t Transformer
if n, ok := t.advance(b); !ok || n < len(b) {
return false
}
return t.isFinal()
}
// ValidString reports whether s conforms to the BiDi rule.
func ValidString(s string) bool {
var t Transformer
if n, ok := t.advanceString(s); !ok || n < len(s) {
return false
}
return t.isFinal()
}
// New returns a Transformer that verifies that input adheres to the Bidi Rule.
func New() *Transformer {
return &Transformer{}
}
// Transformer implements transform.Transform.
type Transformer struct {
state ruleState
hasRTL bool
seen uint16
}
// A rule can only be violated for "Bidi Domain names", meaning if one of the
// following categories has been observed.
func (t *Transformer) isRTL() bool {
const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
return t.seen&isRTL != 0
}
// Reset implements transform.Transformer.
func (t *Transformer) Reset() { *t = Transformer{} }
// Transform implements transform.Transformer. This Transformer has state and
// needs to be reset between uses.
func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(dst) < len(src) {
src = src[:len(dst)]
atEOF = false
err = transform.ErrShortDst
}
n, err1 := t.Span(src, atEOF)
copy(dst, src[:n])
if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
err = err1
}
return n, n, err
}
// Span returns the first n bytes of src that conform to the Bidi rule.
func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
if t.state == ruleInvalid && t.isRTL() {
return 0, ErrInvalid
}
n, ok := t.advance(src)
switch {
case !ok:
err = ErrInvalid
case n < len(src):
if !atEOF {
err = transform.ErrShortSrc
break
}
err = ErrInvalid
case !t.isFinal():
err = ErrInvalid
}
return n, err
}
// Precomputing the ASCII values decreases running time for the ASCII fast path
// by about 30%.
var asciiTable [128]bidi.Properties
func init() {
for i := range asciiTable {
p, _ := bidi.LookupRune(rune(i))
asciiTable[i] = p
}
}
func (t *Transformer) advance(s []byte) (n int, ok bool) {
var e bidi.Properties
var sz int
for n < len(s) {
if s[n] < utf8.RuneSelf {
e, sz = asciiTable[s[n]], 1
} else {
e, sz = bidi.Lookup(s[n:])
if sz <= 1 {
if sz == 1 {
// We always consider invalid UTF-8 to be invalid, even if
// the string has not yet been determined to be RTL.
// TODO: is this correct?
return n, false
}
return n, true // incomplete UTF-8 encoding
}
}
// TODO: using CompactClass would result in noticeable speedup.
// See unicode/bidi/prop.go:Properties.CompactClass.
c := uint16(1 << e.Class())
t.seen |= c
if t.seen&exclusiveRTL == exclusiveRTL {
t.state = ruleInvalid
return n, false
}
switch tr := transitions[t.state]; {
case tr[0].mask&c != 0:
t.state = tr[0].next
case tr[1].mask&c != 0:
t.state = tr[1].next
default:
t.state = ruleInvalid
if t.isRTL() {
return n, false
}
}
n += sz
}
return n, true
}
func (t *Transformer) advanceString(s string) (n int, ok bool) {
var e bidi.Properties
var sz int
for n < len(s) {
if s[n] < utf8.RuneSelf {
e, sz = asciiTable[s[n]], 1
} else {
e, sz = bidi.LookupString(s[n:])
if sz <= 1 {
if sz == 1 {
return n, false // invalid UTF-8
}
return n, true // incomplete UTF-8 encoding
}
}
// TODO: using CompactClass results in noticeable speedup.
// See unicode/bidi/prop.go:Properties.CompactClass.
c := uint16(1 << e.Class())
t.seen |= c
if t.seen&exclusiveRTL == exclusiveRTL {
t.state = ruleInvalid
return n, false
}
switch tr := transitions[t.state]; {
case tr[0].mask&c != 0:
t.state = tr[0].next
case tr[1].mask&c != 0:
t.state = tr[1].next
default:
t.state = ruleInvalid
if t.isRTL() {
return n, false
}
}
n += sz
}
return n, true
}

View file

@ -0,0 +1,11 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.10
package bidirule
func (t *Transformer) isFinal() bool {
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}

View file

@ -0,0 +1,14 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.10
package bidirule
func (t *Transformer) isFinal() bool {
if !t.isRTL() {
return true
}
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}

705
vendor/golang.org/x/text/transform/transform.go generated vendored Normal file
View file

@ -0,0 +1,705 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package transform provides reader and writer wrappers that transform the
// bytes passing through as well as various transformations. Example
// transformations provided by other packages include normalization and
// conversion between character sets.
package transform // import "golang.org/x/text/transform"
import (
"bytes"
"errors"
"io"
"unicode/utf8"
)
var (
// ErrShortDst means that the destination buffer was too short to
// receive all of the transformed bytes.
ErrShortDst = errors.New("transform: short destination buffer")
// ErrShortSrc means that the source buffer has insufficient data to
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
// errShortInternal means that an internal buffer is not large enough
// to make progress and the Transform operation must be aborted.
errShortInternal = errors.New("transform: short internal buffer")
)
// Transformer transforms bytes.
type Transformer interface {
// Transform writes to dst the transformed bytes read from src, and
// returns the number of dst bytes written and src bytes read. The
// atEOF argument tells whether src represents the last bytes of the
// input.
//
// Callers should always process the nDst bytes produced and account
// for the nSrc bytes consumed before considering the error err.
//
// A nil error means that all of the transformed bytes (whether freshly
// transformed from src or left over from previous Transform calls)
// were written to dst. A nil error can be returned regardless of
// whether atEOF is true. If err is nil then nSrc must equal len(src);
// the converse is not necessarily true.
//
// ErrShortDst means that dst was too short to receive all of the
// transformed bytes. ErrShortSrc means that src had insufficient data
// to complete the transformation. If both conditions apply, then
// either error may be returned. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
// Reset resets the state and allows a Transformer to be reused.
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be returned
// regardless of whether atEOF is true. If err is nil, then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
// Reset implements the Reset method of the Transformer interface.
func (NopResetter) Reset() {}
// Reader wraps another io.Reader by transforming the bytes read.
type Reader struct {
r io.Reader
t Transformer
err error
// dst[dst0:dst1] contains bytes that have been transformed by t but
// not yet copied out via Read.
dst []byte
dst0, dst1 int
// src[src0:src1] contains bytes that have been read from r but not
// yet transformed through t.
src []byte
src0, src1 int
// transformComplete is whether the transformation is complete,
// regardless of whether or not it was successful.
transformComplete bool
}
const defaultBufSize = 4096
// NewReader returns a new Reader that wraps r by transforming the bytes read
// via t. It calls Reset on t.
func NewReader(r io.Reader, t Transformer) *Reader {
t.Reset()
return &Reader{
r: r,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Read implements the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
n, err := 0, error(nil)
for {
// Copy out any transformed bytes and return the final error if we are done.
if r.dst0 != r.dst1 {
n = copy(p, r.dst[r.dst0:r.dst1])
r.dst0 += n
if r.dst0 == r.dst1 && r.transformComplete {
return n, r.err
}
return n, nil
} else if r.transformComplete {
return 0, r.err
}
// Try to transform some source bytes, or to flush the transformer if we
// are out of source bytes. We do this even if r.r.Read returned an error.
// As the io.Reader documentation says, "process the n > 0 bytes returned
// before considering the error".
if r.src0 != r.src1 || r.err != nil {
r.dst0 = 0
r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
r.src0 += n
switch {
case err == nil:
if r.src0 != r.src1 {
r.err = errInconsistentByteCount
}
// The Transform call was successful; we are complete if we
// cannot read more bytes into src.
r.transformComplete = r.err != nil
continue
case err == ErrShortDst && (r.dst1 != 0 || n != 0):
// Make room in dst by copying out, and try again.
continue
case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
// Read more bytes into src via the code below, and try again.
default:
r.transformComplete = true
// The reader error (r.err) takes precedence over the
// transformer error (err) unless r.err is nil or io.EOF.
if r.err == nil || r.err == io.EOF {
r.err = err
}
continue
}
}
// Move any untransformed source bytes to the start of the buffer
// and read more bytes.
if r.src0 != 0 {
r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
}
n, r.err = r.r.Read(r.src[r.src1:])
r.src1 += n
}
}
// TODO: implement ReadByte (and ReadRune??).
// Writer wraps another io.Writer by transforming the bytes read.
// The user needs to call Close to flush unwritten bytes that may
// be buffered.
type Writer struct {
w io.Writer
t Transformer
dst []byte
// src[:n] contains bytes that have not yet passed through t.
src []byte
n int
}
// NewWriter returns a new Writer that wraps w by transforming the bytes written
// via t. It calls Reset on t.
func NewWriter(w io.Writer, t Transformer) *Writer {
t.Reset()
return &Writer{
w: w,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Write implements the io.Writer interface. If there are not enough
// bytes available to complete a Transform, the bytes will be buffered
// for the next write. Call Close to convert the remaining bytes.
func (w *Writer) Write(data []byte) (n int, err error) {
src := data
if w.n > 0 {
// Append bytes from data to the last remainder.
// TODO: limit the amount copied on first try.
n = copy(w.src[w.n:], data)
w.n += n
src = w.src[:w.n]
}
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, false)
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return n, werr
}
src = src[nSrc:]
if w.n == 0 {
n += nSrc
} else if len(src) <= n {
// Enough bytes from w.src have been consumed. We make src point
// to data instead to reduce the copying.
w.n = 0
n -= len(src)
src = data[n:]
if n < len(data) && (err == nil || err == ErrShortSrc) {
continue
}
}
switch err {
case ErrShortDst:
// This error is okay as long as we are making progress.
if nDst > 0 || nSrc > 0 {
continue
}
case ErrShortSrc:
if len(src) < len(w.src) {
m := copy(w.src, src)
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if w.n == 0 {
n += m
}
w.n = m
err = nil
} else if nDst > 0 || nSrc > 0 {
// Not enough buffer to store the remainder. Keep processing as
// long as there is progress. Without this case, transforms that
// require a lookahead larger than the buffer may result in an
// error. This is not something one may expect to be common in
// practice, but it may occur when buffers are set to small
// sizes during testing.
continue
}
case nil:
if w.n > 0 {
err = errInconsistentByteCount
}
}
return n, err
}
}
// Close implements the io.Closer interface.
func (w *Writer) Close() error {
src := w.src[:w.n]
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, true)
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return werr
}
if err != ErrShortDst {
return err
}
src = src[nSrc:]
}
}
type nop struct{ NopResetter }
func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := copy(dst, src)
if n < len(src) {
err = ErrShortDst
}
return n, n, err
}
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
return len(src), nil
}
type discard struct{ NopResetter }
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return 0, len(src), nil
}
var (
// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
// buffers given to chain.Transform and the middle N-1 buffers are intermediate
// buffers owned by the chain. The i'th link transforms bytes from the i'th
// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
type chain struct {
link []link
err error
// errStart is the index at which the error occurred plus 1. Processing
// errStart at this level at the next call to Transform. As long as
// errStart > 0, chain will not consume any more source bytes.
errStart int
}
func (c *chain) fatalError(errIndex int, err error) {
if i := errIndex + 1; i > c.errStart {
c.errStart = i
c.err = err
}
}
type link struct {
t Transformer
// b[p:n] holds the bytes to be transformed by t.
b []byte
p int
n int
}
func (l *link) src() []byte {
return l.b[l.p:l.n]
}
func (l *link) dst() []byte {
return l.b[l.n:]
}
// Chain returns a Transformer that applies t in sequence.
func Chain(t ...Transformer) Transformer {
if len(t) == 0 {
return nop{}
}
c := &chain{link: make([]link, len(t)+1)}
for i, tt := range t {
c.link[i].t = tt
}
// Allocate intermediate buffers.
b := make([][defaultBufSize]byte, len(t)-1)
for i := range b {
c.link[i+1].b = b[i][:]
}
return c
}
// Reset resets the state of Chain. It calls Reset on all the Transformers.
func (c *chain) Reset() {
for i, l := range c.link {
if l.t != nil {
l.t.Reset()
}
c.link[i].p, c.link[i].n = 0, 0
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
srcL := &c.link[0]
dstL := &c.link[len(c.link)-1]
srcL.b, srcL.p, srcL.n = src, 0, len(src)
dstL.b, dstL.n = dst, 0
var lastFull, needProgress bool // for detecting progress
// i is the index of the next Transformer to apply, for i in [low, high].
// low is the lowest index for which c.link[low] may still produce bytes.
// high is the highest index for which c.link[high] has a Transformer.
// The error returned by Transform determines whether to increase or
// decrease i. We try to completely fill a buffer before converting it.
for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
in, out := &c.link[i], &c.link[i+1]
nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
out.n += nDst
in.p += nSrc
if i > 0 && in.p == in.n {
in.p, in.n = 0, 0
}
needProgress, lastFull = lastFull, false
switch err0 {
case ErrShortDst:
// Process the destination buffer next. Return if we are already
// at the high index.
if i == high {
return dstL.n, srcL.p, ErrShortDst
}
if out.n != 0 {
i++
// If the Transformer at the next index is not able to process any
// source bytes there is nothing that can be done to make progress
// and the bytes will remain unprocessed. lastFull is used to
// detect this and break out of the loop with a fatal error.
lastFull = true
continue
}
// The destination buffer was too small, but is completely empty.
// Return a fatal error as this transformation can never complete.
c.fatalError(i, errShortInternal)
case ErrShortSrc:
if i == 0 {
// Save ErrShortSrc in err. All other errors take precedence.
err = ErrShortSrc
break
}
// Source bytes were depleted before filling up the destination buffer.
// Verify we made some progress, move the remaining bytes to the errStart
// and try to get more source bytes.
if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
// There were not enough source bytes to proceed while the source
// buffer cannot hold any more bytes. Return a fatal error as this
// transformation can never complete.
c.fatalError(i, errShortInternal)
break
}
// in.b is an internal buffer and we can make progress.
in.p, in.n = 0, copy(in.b, in.src())
fallthrough
case nil:
// if i == low, we have depleted the bytes at index i or any lower levels.
// In that case we increase low and i. In all other cases we decrease i to
// fetch more bytes before proceeding to the next index.
if i > low {
i--
continue
}
default:
c.fatalError(i, err0)
}
// Exhausted level low or fatal error: increase low and continue
// to process the bytes accepted so far.
i++
low = i
}
// If c.errStart > 0, this means we found a fatal error. We will clear
// all upstream buffers. At this point, no more progress can be made
// downstream, as Transform would have bailed while handling ErrShortDst.
if c.errStart > 0 {
for i := 1; i < c.errStart; i++ {
c.link[i].p, c.link[i].n = 0, 0
}
err, c.errStart, c.err = c.err, 0, nil
}
return dstL.n, srcL.p, err
}
// Deprecated: Use runes.Remove instead.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}
type removeF func(r rune) bool
func (removeF) Reset() {}
// Transform implements the Transformer interface.
func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
if r = rune(src[0]); r < utf8.RuneSelf {
sz = 1
} else {
r, sz = utf8.DecodeRune(src)
if sz == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src) {
err = ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(r) {
if nDst+3 > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], "\uFFFD")
}
nSrc++
continue
}
}
if !t(r) {
if nDst+sz > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], src[:sz])
}
nSrc += sz
}
return
}
// grow returns a new []byte that is longer than b, and copies the first n bytes
// of b to the start of the new slice.
func grow(b []byte, n int) []byte {
m := len(b)
if m <= 32 {
m = 64
} else if m <= 256 {
m *= 2
} else {
m += m >> 1
}
buf := make([]byte, m)
copy(buf, b[:n])
return buf
}
const initialBufSize = 128
// String returns a string with the result of converting s[:n] using t, where
// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
func String(t Transformer, s string) (result string, n int, err error) {
t.Reset()
if s == "" {
// Fast path for the common case for empty input. Results in about a
// 86% reduction of running time for BenchmarkStringLowerEmpty.
if _, _, err := t.Transform(nil, nil, true); err == nil {
return "", 0, nil
}
}
// Allocate only once. Note that both dst and src escape when passed to
// Transform.
buf := [2 * initialBufSize]byte{}
dst := buf[:initialBufSize:initialBufSize]
src := buf[initialBufSize : 2*initialBufSize]
// The input string s is transformed in multiple chunks (starting with a
// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
// per-Transform-call) indexes, pDst and pSrc are overall indexes.
nDst, nSrc := 0, 0
pDst, pSrc := 0, 0
// pPrefix is the length of a common prefix: the first pPrefix bytes of the
// result will equal the first pPrefix bytes of s. It is not guaranteed to
// be the largest such value, but if pPrefix, len(result) and len(s) are
// all equal after the final transform (i.e. calling Transform with atEOF
// being true returned nil error) then we don't need to allocate a new
// result string.
pPrefix := 0
for {
// Invariant: pDst == pPrefix && pSrc == pPrefix.
n := copy(src, s[pSrc:])
nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
pDst += nDst
pSrc += nSrc
// TODO: let transformers implement an optional Spanner interface, akin
// to norm's QuickSpan. This would even allow us to avoid any allocation.
if !bytes.Equal(dst[:nDst], src[:nSrc]) {
break
}
pPrefix = pSrc
if err == ErrShortDst {
// A buffer can only be short if a transformer modifies its input.
break
} else if err == ErrShortSrc {
if nSrc == 0 {
// No progress was made.
break
}
// Equal so far and !atEOF, so continue checking.
} else if err != nil || pPrefix == len(s) {
return string(s[:pPrefix]), pPrefix, err
}
}
// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
// We have transformed the first pSrc bytes of the input s to become pDst
// transformed bytes. Those transformed bytes are discontiguous: the first
// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
// that they become one contiguous slice: dst[:pDst].
if pPrefix != 0 {
newDst := dst
if pDst > len(newDst) {
newDst = make([]byte, len(s)+nDst-nSrc)
}
copy(newDst[pPrefix:pDst], dst[:nDst])
copy(newDst[:pPrefix], s[:pPrefix])
dst = newDst
}
// Prevent duplicate Transform calls with atEOF being true at the end of
// the input. Also return if we have an unrecoverable error.
if (err == nil && pSrc == len(s)) ||
(err != nil && err != ErrShortDst && err != ErrShortSrc) {
return string(dst[:pDst]), pSrc, err
}
// Transform the remaining input, growing dst and src buffers as necessary.
for {
n := copy(src, s[pSrc:])
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
pDst += nDst
pSrc += nSrc
// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
// make progress. This may avoid excessive allocations.
if err == ErrShortDst {
if nDst == 0 {
dst = grow(dst, pDst)
}
} else if err == ErrShortSrc {
if nSrc == 0 {
src = grow(src, 0)
}
} else if err != nil || pSrc == len(s) {
return string(dst[:pDst]), pSrc, err
}
}
}
// Bytes returns a new byte slice with the result of converting b[:n] using t,
// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
return doAppend(t, 0, make([]byte, len(b)), b)
}
// Append appends the result of converting src[:n] using t to dst, where
// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) {
if len(dst) == cap(dst) {
n := len(src) + len(dst) // It is okay for this to be 0.
b := make([]byte, n)
dst = b[:copy(b, dst)]
}
return doAppend(t, len(dst), dst[:cap(dst)], src)
}
func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) {
t.Reset()
pSrc := 0
for {
nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true)
pDst += nDst
pSrc += nSrc
if err != ErrShortDst {
return dst[:pDst], pSrc, err
}
// Grow the destination buffer, but do not grow as long as we can make
// progress. This may avoid excessive allocations.
if nDst == 0 {
dst = grow(dst, pDst)
}
}
}

198
vendor/golang.org/x/text/unicode/bidi/bidi.go generated vendored Normal file
View file

@ -0,0 +1,198 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go gen_ranges.go
// Package bidi contains functionality for bidirectional text support.
//
// See https://www.unicode.org/reports/tr9.
//
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
// and without notice.
package bidi // import "golang.org/x/text/unicode/bidi"
// TODO:
// The following functionality would not be hard to implement, but hinges on
// the definition of a Segmenter interface. For now this is up to the user.
// - Iterate over paragraphs
// - Segmenter to iterate over runs directly from a given text.
// Also:
// - Transformer for reordering?
// - Transformer (validator, really) for Bidi Rule.
// This API tries to avoid dealing with embedding levels for now. Under the hood
// these will be computed, but the question is to which extent the user should
// know they exist. We should at some point allow the user to specify an
// embedding hierarchy, though.
// A Direction indicates the overall flow of text.
type Direction int
const (
// LeftToRight indicates the text contains no right-to-left characters and
// that either there are some left-to-right characters or the option
// DefaultDirection(LeftToRight) was passed.
LeftToRight Direction = iota
// RightToLeft indicates the text contains no left-to-right characters and
// that either there are some right-to-left characters or the option
// DefaultDirection(RightToLeft) was passed.
RightToLeft
// Mixed indicates text contains both left-to-right and right-to-left
// characters.
Mixed
// Neutral means that text contains no left-to-right and right-to-left
// characters and that no default direction has been set.
Neutral
)
type options struct{}
// An Option is an option for Bidi processing.
type Option func(*options)
// ICU allows the user to define embedding levels. This may be used, for example,
// to use hierarchical structure of markup languages to define embeddings.
// The following option may be a way to expose this functionality in this API.
// // LevelFunc sets a function that associates nesting levels with the given text.
// // The levels function will be called with monotonically increasing values for p.
// func LevelFunc(levels func(p int) int) Option {
// panic("unimplemented")
// }
// DefaultDirection sets the default direction for a Paragraph. The direction is
// overridden if the text contains directional characters.
func DefaultDirection(d Direction) Option {
panic("unimplemented")
}
// A Paragraph holds a single Paragraph for Bidi processing.
type Paragraph struct {
// buffers
}
// SetBytes configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
panic("unimplemented")
}
// SetString configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
panic("unimplemented")
}
// IsLeftToRight reports whether the principle direction of rendering for this
// paragraphs is left-to-right. If this returns false, the principle direction
// of rendering is right-to-left.
func (p *Paragraph) IsLeftToRight() bool {
panic("unimplemented")
}
// Direction returns the direction of the text of this paragraph.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (p *Paragraph) Direction() Direction {
panic("unimplemented")
}
// RunAt reports the Run at the given position of the input text.
//
// This method can be used for computing line breaks on paragraphs.
func (p *Paragraph) RunAt(pos int) Run {
panic("unimplemented")
}
// Order computes the visual ordering of all the runs in a Paragraph.
func (p *Paragraph) Order() (Ordering, error) {
panic("unimplemented")
}
// Line computes the visual ordering of runs for a single line starting and
// ending at the given positions in the original text.
func (p *Paragraph) Line(start, end int) (Ordering, error) {
panic("unimplemented")
}
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
// The methods of an Ordering should only be called by one goroutine at a time.
type Ordering struct{}
// Direction reports the directionality of the runs.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (o *Ordering) Direction() Direction {
panic("unimplemented")
}
// NumRuns returns the number of runs.
func (o *Ordering) NumRuns() int {
panic("unimplemented")
}
// Run returns the ith run within the ordering.
func (o *Ordering) Run(i int) Run {
panic("unimplemented")
}
// TODO: perhaps with options.
// // Reorder creates a reader that reads the runes in visual order per character.
// // Modifiers remain after the runes they modify.
// func (l *Runs) Reorder() io.Reader {
// panic("unimplemented")
// }
// A Run is a continuous sequence of characters of a single direction.
type Run struct {
}
// String returns the text of the run in its original order.
func (r *Run) String() string {
panic("unimplemented")
}
// Bytes returns the text of the run in its original order.
func (r *Run) Bytes() []byte {
panic("unimplemented")
}
// TODO: methods for
// - Display order
// - headers and footers
// - bracket replacement.
// Direction reports the direction of the run.
func (r *Run) Direction() Direction {
panic("unimplemented")
}
// Position of the Run within the text passed to SetBytes or SetString of the
// originating Paragraph value.
func (r *Run) Pos() (start, end int) {
panic("unimplemented")
}
// AppendReverse reverses the order of characters of in, appends them to out,
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
panic("unimplemented")
}
// ReverseString reverses the order of characters in s and returns a new string.
// Modifiers will still follow the runes they modify. Brackets are replaced with
// their counterparts.
func ReverseString(s string) string {
panic("unimplemented")
}

335
vendor/golang.org/x/text/unicode/bidi/bracket.go generated vendored Normal file
View file

@ -0,0 +1,335 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"container/list"
"fmt"
"sort"
)
// This file contains a port of the reference implementation of the
// Bidi Parentheses Algorithm:
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
//
// The implementation in this file covers definitions BD14-BD16 and rule N0
// of UAX#9.
//
// Some preprocessing is done for each rune before data is passed to this
// algorithm:
// - opening and closing brackets are identified
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
// is identical for the opening and closing bracket. It is left to do these
// mappings.
// - The BPA algorithm requires that bracket characters that are canonical
// equivalents of each other be able to be substituted for each other.
// It is the responsibility of the caller to do this canonicalization.
//
// In implementing BD16, this implementation departs slightly from the "logical"
// algorithm defined in UAX#9. In particular, the stack referenced there
// supports operations that go beyond a "basic" stack. An equivalent
// implementation based on a linked list is used here.
// Bidi_Paired_Bracket_Type
// BD14. An opening paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Open.
//
// BD15. A closing paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Close.
type bracketType byte
const (
bpNone bracketType = iota
bpOpen
bpClose
)
// bracketPair holds a pair of index values for opening and closing bracket
// location of a bracket pair.
type bracketPair struct {
opener int
closer int
}
func (b *bracketPair) String() string {
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
}
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
type bracketPairs []bracketPair
func (b bracketPairs) Len() int { return len(b) }
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
//
// For each rune, it takes the indexes into the original string, the class the
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
// takes the direction type for the start-of-sentence and the embedding level.
//
// The identifiers for bracket types are the rune of the canonicalized opening
// bracket for brackets (open or close) or 0 for runes that are not brackets.
func resolvePairedBrackets(s *isolatingRunSequence) {
p := bracketPairer{
sos: s.sos,
openers: list.New(),
codesIsolatedRun: s.types,
indexes: s.indexes,
}
dirEmbed := L
if s.level&1 != 0 {
dirEmbed = R
}
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
p.resolveBrackets(dirEmbed, s.p.initialTypes)
}
type bracketPairer struct {
sos Class // direction corresponding to start of sequence
// The following is a restatement of BD 16 using non-algorithmic language.
//
// A bracket pair is a pair of characters consisting of an opening
// paired bracket and a closing paired bracket such that the
// Bidi_Paired_Bracket property value of the former equals the latter,
// subject to the following constraints.
// - both characters of a pair occur in the same isolating run sequence
// - the closing character of a pair follows the opening character
// - any bracket character can belong at most to one pair, the earliest possible one
// - any bracket character not part of a pair is treated like an ordinary character
// - pairs may nest properly, but their spans may not overlap otherwise
// Bracket characters with canonical decompositions are supposed to be
// treated as if they had been normalized, to allow normalized and non-
// normalized text to give the same result. In this implementation that step
// is pushed out to the caller. The caller has to ensure that the pairValue
// slices contain the rune of the opening bracket after normalization for
// any opening or closing bracket.
openers *list.List // list of positions for opening brackets
// bracket pair positions sorted by location of opening bracket
pairPositions bracketPairs
codesIsolatedRun []Class // directional bidi codes for an isolated run
indexes []int // array of index values into the original string
}
// matchOpener reports whether characters at given positions form a matching
// bracket pair.
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
}
const maxPairingDepth = 63
// locateBrackets locates matching bracket pairs according to BD16.
//
// This implementation uses a linked list instead of a stack, because, while
// elements are added at the front (like a push) they are not generally removed
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
// traverse the run
// do that explicitly (not in a for-each) so we can record position
for i, index := range p.indexes {
// look at the bracket type for each character
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
// continue scanning
continue
}
switch pairTypes[index] {
case bpOpen:
// check if maximum pairing depth reached
if p.openers.Len() == maxPairingDepth {
p.openers.Init()
return
}
// remember opener location, most recent first
p.openers.PushFront(i)
case bpClose:
// see if there is a match
count := 0
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
count++
opener := elem.Value.(int)
if p.matchOpener(pairValues, opener, i) {
// if the opener matches, add nested pair to the ordered list
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
// remove up to and including matched opener
for ; count > 0; count-- {
p.openers.Remove(p.openers.Front())
}
break
}
}
sort.Sort(p.pairPositions)
// if we get here, the closing bracket matched no openers
// and gets ignored
}
}
}
// Bracket pairs within an isolating run sequence are processed as units so
// that both the opening and the closing paired bracket in a pair resolve to
// the same direction.
//
// N0. Process bracket pairs in an isolating run sequence sequentially in
// the logical order of the text positions of the opening paired brackets
// using the logic given below. Within this scope, bidirectional types EN
// and AN are treated as R.
//
// Identify the bracket pairs in the current isolating run sequence
// according to BD16. For each bracket-pair element in the list of pairs of
// text positions:
//
// a Inspect the bidirectional types of the characters enclosed within the
// bracket pair.
//
// b If any strong type (either L or R) matching the embedding direction is
// found, set the type for both brackets in the pair to match the embedding
// direction.
//
// o [ e ] o -> o e e e o
//
// o [ o e ] -> o e o e e
//
// o [ NI e ] -> o e NI e e
//
// c Otherwise, if a strong type (opposite the embedding direction) is
// found, test for adjacent strong types as follows: 1 First, check
// backwards before the opening paired bracket until the first strong type
// (L, R, or sos) is found. If that first preceding strong type is opposite
// the embedding direction, then set the type for both brackets in the pair
// to that type. 2 Otherwise, set the type for both brackets in the pair to
// the embedding direction.
//
// o [ o ] e -> o o o o e
//
// o [ o NI ] o -> o o o NI o o
//
// e [ o ] o -> e e o e o
//
// e [ o ] e -> e e o e e
//
// e ( o [ o ] NI ) e -> e e o o o o NI e e
//
// d Otherwise, do not set the type for the current bracket pair. Note that
// if the enclosed text contains no strong types the paired brackets will
// both resolve to the same level when resolved individually using rules N1
// and N2.
//
// e ( NI ) o -> e ( NI ) o
// getStrongTypeN0 maps character's directional code to strong type as required
// by rule N0.
//
// TODO: have separate type for "strong" directionality.
func (p *bracketPairer) getStrongTypeN0(index int) Class {
switch p.codesIsolatedRun[index] {
// in the scope of N0, number types are treated as R
case EN, AN, AL, R:
return R
case L:
return L
default:
return ON
}
}
// classifyPairContent reports the strong types contained inside a Bracket Pair,
// assuming the given embedding direction.
//
// It returns ON if no strong type is found. If a single strong type is found,
// it returns this type. Otherwise it returns the embedding direction.
//
// TODO: use separate type for "strong" directionality.
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
dirOpposite := ON
for i := loc.opener + 1; i < loc.closer; i++ {
dir := p.getStrongTypeN0(i)
if dir == ON {
continue
}
if dir == dirEmbed {
return dir // type matching embedding direction found
}
dirOpposite = dir
}
// return ON if no strong type found, or class opposite to dirEmbed
return dirOpposite
}
// classBeforePair determines which strong types are present before a Bracket
// Pair. Return R or L if strong type found, otherwise ON.
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
for i := loc.opener - 1; i >= 0; i-- {
if dir := p.getStrongTypeN0(i); dir != ON {
return dir
}
}
// no strong types found, return sos
return p.sos
}
// assignBracketType implements rule N0 for a single bracket pair.
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
// rule "N0, a", inspect contents of pair
dirPair := p.classifyPairContent(loc, dirEmbed)
// dirPair is now L, R, or N (no strong type found)
// the following logical tests are performed out of order compared to
// the statement of the rules but yield the same results
if dirPair == ON {
return // case "d" - nothing to do
}
if dirPair != dirEmbed {
// case "c": strong type found, opposite - check before (c.1)
dirPair = p.classBeforePair(loc)
if dirPair == dirEmbed || dirPair == ON {
// no strong opposite type found before - use embedding (c.2)
dirPair = dirEmbed
}
}
// else: case "b", strong type found matching embedding,
// no explicit action needed, as dirPair is already set to embedding
// direction
// set the bracket types to the type found
p.setBracketsToType(loc, dirPair, initialTypes)
}
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
p.codesIsolatedRun[loc.opener] = dirPair
p.codesIsolatedRun[loc.closer] = dirPair
for i := loc.opener + 1; i < loc.closer; i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
for i := loc.closer + 1; i < len(p.indexes); i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
}
// resolveBrackets implements rule N0 for a list of pairs.
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
for _, loc := range p.pairPositions {
p.assignBracketType(loc, dirEmbed, initialTypes)
}
}

1058
vendor/golang.org/x/text/unicode/bidi/core.go generated vendored Normal file

File diff suppressed because it is too large Load diff

133
vendor/golang.org/x/text/unicode/bidi/gen.go generated vendored Normal file
View file

@ -0,0 +1,133 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"flag"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
var outputFile = flag.String("out", "tables.go", "output file")
func main() {
gen.Init()
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
genTables()
}
// bidiClass names and codes taken from class "bc" in
// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
var bidiClass = map[string]Class{
"AL": AL, // ArabicLetter
"AN": AN, // ArabicNumber
"B": B, // ParagraphSeparator
"BN": BN, // BoundaryNeutral
"CS": CS, // CommonSeparator
"EN": EN, // EuropeanNumber
"ES": ES, // EuropeanSeparator
"ET": ET, // EuropeanTerminator
"L": L, // LeftToRight
"NSM": NSM, // NonspacingMark
"ON": ON, // OtherNeutral
"R": R, // RightToLeft
"S": S, // SegmentSeparator
"WS": WS, // WhiteSpace
"FSI": Control,
"PDF": Control,
"PDI": Control,
"LRE": Control,
"LRI": Control,
"LRO": Control,
"RLE": Control,
"RLI": Control,
"RLO": Control,
}
func genTables() {
if numClass > 0x0F {
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
}
w := gen.NewCodeWriter()
defer w.WriteVersionedGoFile(*outputFile, "bidi")
gen.WriteUnicodeVersion(w)
t := triegen.NewTrie("bidi")
// Build data about bracket mapping. These bits need to be or-ed with
// any other bits.
orMask := map[rune]uint64{}
xorMap := map[rune]int{}
xorMasks := []rune{0} // First value is no-op.
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
r2 := p.Rune(1)
xor := r1 ^ r2
if _, ok := xorMap[xor]; !ok {
xorMap[xor] = len(xorMasks)
xorMasks = append(xorMasks, xor)
}
entry := uint64(xorMap[xor]) << xorMaskShift
switch p.String(2) {
case "o":
entry |= openMask
case "c", "n":
default:
log.Fatalf("Unknown bracket class %q.", p.String(2))
}
orMask[r1] = entry
})
w.WriteComment(`
xorMasks contains masks to be xor-ed with brackets to get the reverse
version.`)
w.WriteVar("xorMasks", xorMasks)
done := map[rune]bool{}
insert := func(r rune, c Class) {
if !done[r] {
t.Insert(r, orMask[r]|uint64(c))
done[r] = true
}
}
// Insert the derived BiDi properties.
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
class, ok := bidiClass[p.String(1)]
if !ok {
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
}
insert(r, class)
})
visitDefaults(insert)
// TODO: use sparse blocks. This would reduce table size considerably
// from the looks of it.
sz, err := t.Gen(w)
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
// dummy values to make methods in gen_common compile. The real versions
// will be generated by this file to tables.go.
var (
xorMasks []rune
)

57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go generated vendored Normal file
View file

@ -0,0 +1,57 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/rangetable"
)
// These tables are hand-extracted from:
// https://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
func visitDefaults(fn func(r rune, c Class)) {
// first write default values for ranges listed above.
visitRunes(fn, AL, []rune{
0x0600, 0x07BF, // Arabic
0x08A0, 0x08FF, // Arabic Extended-A
0xFB50, 0xFDCF, // Arabic Presentation Forms
0xFDF0, 0xFDFF,
0xFE70, 0xFEFF,
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
})
visitRunes(fn, R, []rune{
0x0590, 0x05FF, // Hebrew
0x07C0, 0x089F, // Nko et al.
0xFB1D, 0xFB4F,
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
0x0001E800, 0x0001EDFF,
0x0001EF00, 0x0001EFFF,
})
visitRunes(fn, ET, []rune{ // European Terminator
0x20A0, 0x20Cf, // Currency symbols
})
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, BN) // Boundary Neutral
})
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), BN) // Boundary Neutral
}
})
}
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
for i := 0; i < len(runes); i += 2 {
lo, hi := runes[i], runes[i+1]
for j := lo; j <= hi; j++ {
fn(j, c)
}
}
}

64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go generated vendored Normal file
View file

@ -0,0 +1,64 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

206
vendor/golang.org/x/text/unicode/bidi/prop.go generated vendored Normal file
View file

@ -0,0 +1,206 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import "unicode/utf8"
// Properties provides access to BiDi properties of runes.
type Properties struct {
entry uint8
last uint8
}
var trie = newBidiTrie(0)
// TODO: using this for bidirule reduces the running time by about 5%. Consider
// if this is worth exposing or if we can find a way to speed up the Class
// method.
//
// // CompactClass is like Class, but maps all of the BiDi control classes
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
// func (p Properties) CompactClass() Class {
// return Class(p.entry & 0x0F)
// }
// Class returns the Bidi class for p.
func (p Properties) Class() Class {
c := Class(p.entry & 0x0F)
if c == Control {
c = controlByteToClass[p.last&0xF]
}
return c
}
// IsBracket reports whether the rune is a bracket.
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
// IsOpeningBracket reports whether the rune is an opening bracket.
// IsBracket must return true.
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
// TODO: find a better API and expose.
func (p Properties) reverseBracket(r rune) rune {
return xorMasks[p.entry>>xorMaskShift] ^ r
}
var controlByteToClass = [16]Class{
0xD: LRO, // U+202D LeftToRightOverride,
0xE: RLO, // U+202E RightToLeftOverride,
0xA: LRE, // U+202A LeftToRightEmbedding,
0xB: RLE, // U+202B RightToLeftEmbedding,
0xC: PDF, // U+202C PopDirectionalFormat,
0x6: LRI, // U+2066 LeftToRightIsolate,
0x7: RLI, // U+2067 RightToLeftIsolate,
0x8: FSI, // U+2068 FirstStrongIsolate,
0x9: PDI, // U+2069 PopDirectionalIsolate,
}
// LookupRune returns properties for r.
func LookupRune(r rune) (p Properties, size int) {
var buf [4]byte
n := utf8.EncodeRune(buf[:], r)
return Lookup(buf[:n])
}
// TODO: these lookup methods are based on the generated trie code. The returned
// sizes have slightly different semantics from the generated code, in that it
// always returns size==1 for an illegal UTF-8 byte (instead of the length
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
// leave invalid UTF-8 untouched, in which case it has performance benefits to
// do so (without changing the semantics). Bidi requires the semantics used here
// for the bidirule implementation to be compatible with the Go semantics.
// They ultimately should perhaps be adopted by all trie implementations, for
// convenience sake.
// This unrolled code also boosts performance of the secure/bidirule package by
// about 30%.
// So, to remove this code:
// - add option to trie generator to define return type.
// - always return 1 byte size for ill-formed UTF-8 runes.
// Lookup returns properties for the first rune in s and the width in bytes of
// its encoding. The size will be 0 if s does not hold enough bytes to complete
// the encoding.
func Lookup(s []byte) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}
// LookupString returns properties for the first rune in s and the width in
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
// complete the encoding.
func LookupString(s string) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}

1815
vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

1887
vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

1781
vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

60
vendor/golang.org/x/text/unicode/bidi/trieval.go generated vendored Normal file
View file

@ -0,0 +1,60 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package bidi
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

105
vendor/golang.org/x/text/unicode/cldr/base.go generated vendored Normal file
View file

@ -0,0 +1,105 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"encoding/xml"
"regexp"
"strconv"
)
// Elem is implemented by every XML element.
type Elem interface {
setEnclosing(Elem)
setName(string)
enclosing() Elem
GetCommon() *Common
}
type hidden struct {
CharData string `xml:",chardata"`
Alias *struct {
Common
Source string `xml:"source,attr"`
Path string `xml:"path,attr"`
} `xml:"alias"`
Def *struct {
Common
Choice string `xml:"choice,attr,omitempty"`
Type string `xml:"type,attr,omitempty"`
} `xml:"default"`
}
// Common holds several of the most common attributes and sub elements
// of an XML element.
type Common struct {
XMLName xml.Name
name string
enclElem Elem
Type string `xml:"type,attr,omitempty"`
Reference string `xml:"reference,attr,omitempty"`
Alt string `xml:"alt,attr,omitempty"`
ValidSubLocales string `xml:"validSubLocales,attr,omitempty"`
Draft string `xml:"draft,attr,omitempty"`
hidden
}
// Default returns the default type to select from the enclosed list
// or "" if no default value is specified.
func (e *Common) Default() string {
if e.Def == nil {
return ""
}
if e.Def.Choice != "" {
return e.Def.Choice
} else if e.Def.Type != "" {
// Type is still used by the default element in collation.
return e.Def.Type
}
return ""
}
// Element returns the XML element name.
func (e *Common) Element() string {
return e.name
}
// GetCommon returns e. It is provided such that Common implements Elem.
func (e *Common) GetCommon() *Common {
return e
}
// Data returns the character data accumulated for this element.
func (e *Common) Data() string {
e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode)
return e.CharData
}
func (e *Common) setName(s string) {
e.name = s
}
func (e *Common) enclosing() Elem {
return e.enclElem
}
func (e *Common) setEnclosing(en Elem) {
e.enclElem = en
}
// Escape characters that can be escaped without further escaping the string.
var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`)
// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string.
// It assumes the input string is correctly formatted.
func replaceUnicode(s string) string {
if s[1] == '#' {
r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32)
return string(r)
}
r, _, _, _ := strconv.UnquoteChar(s, 0)
return string(r)
}

137
vendor/golang.org/x/text/unicode/cldr/cldr.go generated vendored Normal file
View file

@ -0,0 +1,137 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run makexml.go -output xml.go
// Package cldr provides a parser for LDML and related XML formats.
//
// This package is intended to be used by the table generation tools for the
// various packages in x/text and is not internal for historical reasons.
//
// As the XML types are generated from the CLDR DTD, and as the CLDR standard is
// periodically amended, this package may change considerably over time. This
// mostly means that data may appear and disappear between versions. That is,
// old code should keep compiling for newer versions, but data may have moved or
// changed. CLDR version 22 is the first version supported by this package.
// Older versions may not work.
package cldr // import "golang.org/x/text/unicode/cldr"
import (
"fmt"
"sort"
)
// CLDR provides access to parsed data of the Unicode Common Locale Data Repository.
type CLDR struct {
parent map[string][]string
locale map[string]*LDML
resolved map[string]*LDML
bcp47 *LDMLBCP47
supp *SupplementalData
}
func makeCLDR() *CLDR {
return &CLDR{
parent: make(map[string][]string),
locale: make(map[string]*LDML),
resolved: make(map[string]*LDML),
bcp47: &LDMLBCP47{},
supp: &SupplementalData{},
}
}
// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned.
func (cldr *CLDR) BCP47() *LDMLBCP47 {
return nil
}
// Draft indicates the draft level of an element.
type Draft int
const (
Approved Draft = iota
Contributed
Provisional
Unconfirmed
)
var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""}
// ParseDraft returns the Draft value corresponding to the given string. The
// empty string corresponds to Approved.
func ParseDraft(level string) (Draft, error) {
if level == "" {
return Approved, nil
}
for i, s := range drafts {
if level == s {
return Unconfirmed - Draft(i), nil
}
}
return Approved, fmt.Errorf("cldr: unknown draft level %q", level)
}
func (d Draft) String() string {
return drafts[len(drafts)-1-int(d)]
}
// SetDraftLevel sets which draft levels to include in the evaluated LDML.
// Any draft element for which the draft level is higher than lev will be excluded.
// If multiple draft levels are available for a single element, the one with the
// lowest draft level will be selected, unless preferDraft is true, in which case
// the highest draft will be chosen.
// It is assumed that the underlying LDML is canonicalized.
func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) {
// TODO: implement
cldr.resolved = make(map[string]*LDML)
}
// RawLDML returns the LDML XML for id in unresolved form.
// id must be one of the strings returned by Locales.
func (cldr *CLDR) RawLDML(loc string) *LDML {
return cldr.locale[loc]
}
// LDML returns the fully resolved LDML XML for loc, which must be one of
// the strings returned by Locales.
//
// Deprecated: Use RawLDML and implement inheritance manually or using the
// internal cldrtree package.
// Inheritance has changed quite a bit since the onset of this package and in
// practice data often represented in a way where knowledge of how it was
// inherited is relevant.
func (cldr *CLDR) LDML(loc string) (*LDML, error) {
return cldr.resolve(loc)
}
// Supplemental returns the parsed supplemental data. If no such data was parsed,
// nil is returned.
func (cldr *CLDR) Supplemental() *SupplementalData {
return cldr.supp
}
// Locales returns the locales for which there exist files.
// Valid sublocales for which there is no file are not included.
// The root locale is always sorted first.
func (cldr *CLDR) Locales() []string {
loc := []string{"root"}
hasRoot := false
for l, _ := range cldr.locale {
if l == "root" {
hasRoot = true
continue
}
loc = append(loc, l)
}
sort.Strings(loc[1:])
if !hasRoot {
return loc[1:]
}
return loc
}
// Get fills in the fields of x based on the XPath path.
func Get(e Elem, path string) (res Elem, err error) {
return walkXPath(e, path)
}

359
vendor/golang.org/x/text/unicode/cldr/collate.go generated vendored Normal file
View file

@ -0,0 +1,359 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"bufio"
"encoding/xml"
"errors"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// RuleProcessor can be passed to Collator's Process method, which
// parses the rules and calls the respective method for each rule found.
type RuleProcessor interface {
Reset(anchor string, before int) error
Insert(level int, str, context, extend string) error
Index(id string)
}
const (
// cldrIndex is a Unicode-reserved sentinel value used to mark the start
// of a grouping within an index.
// We ignore any rule that starts with this rune.
// See https://unicode.org/reports/tr35/#Collation_Elements for details.
cldrIndex = "\uFDD0"
// specialAnchor is the format in which to represent logical reset positions,
// such as "first tertiary ignorable".
specialAnchor = "<%s/>"
)
// Process parses the rules for the tailorings of this collation
// and calls the respective methods of p for each rule found.
func (c Collation) Process(p RuleProcessor) (err error) {
if len(c.Cr) > 0 {
if len(c.Cr) > 1 {
return fmt.Errorf("multiple cr elements, want 0 or 1")
}
return processRules(p, c.Cr[0].Data())
}
if c.Rules.Any != nil {
return c.processXML(p)
}
return errors.New("no tailoring data")
}
// processRules parses rules in the Collation Rule Syntax defined in
// https://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
func processRules(p RuleProcessor, s string) (err error) {
chk := func(s string, e error) string {
if err == nil {
err = e
}
return s
}
i := 0 // Save the line number for use after the loop.
scanner := bufio.NewScanner(strings.NewReader(s))
for ; scanner.Scan() && err == nil; i++ {
for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
level := 5
var ch byte
switch ch, s = s[0], s[1:]; ch {
case '&': // followed by <anchor> or '[' <key> ']'
if s = skipSpace(s); consume(&s, '[') {
s = chk(parseSpecialAnchor(p, s))
} else {
s = chk(parseAnchor(p, 0, s))
}
case '<': // sort relation '<'{1,4}, optionally followed by '*'.
for level = 1; consume(&s, '<'); level++ {
}
if level > 4 {
err = fmt.Errorf("level %d > 4", level)
}
fallthrough
case '=': // identity relation, optionally followed by *.
if consume(&s, '*') {
s = chk(parseSequence(p, level, s))
} else {
s = chk(parseOrder(p, level, s))
}
default:
chk("", fmt.Errorf("illegal operator %q", ch))
break
}
}
}
if chk("", scanner.Err()); err != nil {
return fmt.Errorf("%d: %v", i, err)
}
return nil
}
// parseSpecialAnchor parses the anchor syntax which is either of the form
// ['before' <level>] <anchor>
// or
// [<label>]
// The starting should already be consumed.
func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
i := strings.IndexByte(s, ']')
if i == -1 {
return "", errors.New("unmatched bracket")
}
a := strings.TrimSpace(s[:i])
s = s[i+1:]
if strings.HasPrefix(a, "before ") {
l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
if err != nil {
return s, err
}
return parseAnchor(p, int(l), s)
}
return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
}
func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
anchor, s, err := scanString(s)
if err != nil {
return s, err
}
return s, p.Reset(anchor, level)
}
func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
var value, context, extend string
if value, s, err = scanString(s); err != nil {
return s, err
}
if strings.HasPrefix(value, cldrIndex) {
p.Index(value[len(cldrIndex):])
return
}
if consume(&s, '|') {
if context, s, err = scanString(s); err != nil {
return s, errors.New("missing string after context")
}
}
if consume(&s, '/') {
if extend, s, err = scanString(s); err != nil {
return s, errors.New("missing string after extension")
}
}
return s, p.Insert(level, value, context, extend)
}
// scanString scans a single input string.
func scanString(s string) (str, tail string, err error) {
if s = skipSpace(s); s == "" {
return s, s, errors.New("missing string")
}
buf := [16]byte{} // small but enough to hold most cases.
value := buf[:0]
for s != "" {
if consume(&s, '\'') {
i := strings.IndexByte(s, '\'')
if i == -1 {
return "", "", errors.New(`unmatched single quote`)
}
if i == 0 {
value = append(value, '\'')
} else {
value = append(value, s[:i]...)
}
s = s[i+1:]
continue
}
r, sz := utf8.DecodeRuneInString(s)
if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
break
}
value = append(value, s[:sz]...)
s = s[sz:]
}
return string(value), skipSpace(s), nil
}
func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
if s = skipSpace(s); s == "" {
return s, errors.New("empty sequence")
}
last := rune(0)
for s != "" {
r, sz := utf8.DecodeRuneInString(s)
s = s[sz:]
if r == '-' {
// We have a range. The first element was already written.
if last == 0 {
return s, errors.New("range without starter value")
}
r, sz = utf8.DecodeRuneInString(s)
s = s[sz:]
if r == utf8.RuneError || r < last {
return s, fmt.Errorf("invalid range %q-%q", last, r)
}
for i := last + 1; i <= r; i++ {
if err := p.Insert(level, string(i), "", ""); err != nil {
return s, err
}
}
last = 0
continue
}
if unicode.IsSpace(r) || unicode.IsPunct(r) {
break
}
// normal case
if err := p.Insert(level, string(r), "", ""); err != nil {
return s, err
}
last = r
}
return s, nil
}
func skipSpace(s string) string {
return strings.TrimLeftFunc(s, unicode.IsSpace)
}
// consumes returns whether the next byte is ch. If so, it gobbles it by
// updating s.
func consume(s *string, ch byte) (ok bool) {
if *s == "" || (*s)[0] != ch {
return false
}
*s = (*s)[1:]
return true
}
// The following code parses Collation rules of CLDR version 24 and before.
var lmap = map[byte]int{
'p': 1,
's': 2,
't': 3,
'i': 5,
}
type rulesElem struct {
Rules struct {
Common
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
} `xml:"rules"`
}
type rule struct {
Value string `xml:",chardata"`
Before string `xml:"before,attr"`
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
}
var emptyValueError = errors.New("cldr: empty rule value")
func (r *rule) value() (string, error) {
// Convert hexadecimal Unicode codepoint notation to a string.
s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
r.Value = s
if s == "" {
if len(r.Any) != 1 {
return "", emptyValueError
}
r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
r.Any = nil
} else if len(r.Any) != 0 {
return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
}
return r.Value, nil
}
func (r rule) process(p RuleProcessor, name, context, extend string) error {
v, err := r.value()
if err != nil {
return err
}
switch name {
case "p", "s", "t", "i":
if strings.HasPrefix(v, cldrIndex) {
p.Index(v[len(cldrIndex):])
return nil
}
if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
return err
}
case "pc", "sc", "tc", "ic":
level := lmap[name[0]]
for _, s := range v {
if err := p.Insert(level, string(s), context, extend); err != nil {
return err
}
}
default:
return fmt.Errorf("cldr: unsupported tag: %q", name)
}
return nil
}
// processXML parses the format of CLDR versions 24 and older.
func (c Collation) processXML(p RuleProcessor) (err error) {
// Collation is generated and defined in xml.go.
var v string
for _, r := range c.Rules.Any {
switch r.XMLName.Local {
case "reset":
level := 0
switch r.Before {
case "primary", "1":
level = 1
case "secondary", "2":
level = 2
case "tertiary", "3":
level = 3
case "":
default:
return fmt.Errorf("cldr: unknown level %q", r.Before)
}
v, err = r.value()
if err == nil {
err = p.Reset(v, level)
}
case "x":
var context, extend string
for _, r1 := range r.Any {
v, err = r1.value()
switch r1.XMLName.Local {
case "context":
context = v
case "extend":
extend = v
}
}
for _, r1 := range r.Any {
if t := r1.XMLName.Local; t == "context" || t == "extend" {
continue
}
r1.rule.process(p, r1.XMLName.Local, context, extend)
}
default:
err = r.rule.process(p, r.XMLName.Local, "", "")
}
if err != nil {
return err
}
}
return nil
}

172
vendor/golang.org/x/text/unicode/cldr/decode.go generated vendored Normal file
View file

@ -0,0 +1,172 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"archive/zip"
"bytes"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
)
// A Decoder loads an archive of CLDR data.
type Decoder struct {
dirFilter []string
sectionFilter []string
loader Loader
cldr *CLDR
curLocale string
}
// SetSectionFilter takes a list top-level LDML element names to which
// evaluation of LDML should be limited. It automatically calls SetDirFilter.
func (d *Decoder) SetSectionFilter(filter ...string) {
d.sectionFilter = filter
// TODO: automatically set dir filter
}
// SetDirFilter limits the loading of LDML XML files of the specied directories.
// Note that sections may be split across directories differently for different CLDR versions.
// For more robust code, use SetSectionFilter.
func (d *Decoder) SetDirFilter(dir ...string) {
d.dirFilter = dir
}
// A Loader provides access to the files of a CLDR archive.
type Loader interface {
Len() int
Path(i int) string
Reader(i int) (io.ReadCloser, error)
}
var fileRe = regexp.MustCompile(`.*[/\\](.*)[/\\](.*)\.xml`)
// Decode loads and decodes the files represented by l.
func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
d.cldr = makeCLDR()
for i := 0; i < l.Len(); i++ {
fname := l.Path(i)
if m := fileRe.FindStringSubmatch(fname); m != nil {
if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
continue
}
var r io.ReadCloser
if r, err = l.Reader(i); err == nil {
err = d.decode(m[1], m[2], r)
r.Close()
}
if err != nil {
return nil, err
}
}
}
d.cldr.finalize(d.sectionFilter)
return d.cldr, nil
}
func (d *Decoder) decode(dir, id string, r io.Reader) error {
var v interface{}
var l *LDML
cldr := d.cldr
switch {
case dir == "supplemental":
v = cldr.supp
case dir == "transforms":
return nil
case dir == "bcp47":
v = cldr.bcp47
case dir == "validity":
return nil
default:
ok := false
if v, ok = cldr.locale[id]; !ok {
l = &LDML{}
v, cldr.locale[id] = l, l
}
}
x := xml.NewDecoder(r)
if err := x.Decode(v); err != nil {
log.Printf("%s/%s: %v", dir, id, err)
return err
}
if l != nil {
if l.Identity == nil {
return fmt.Errorf("%s/%s: missing identity element", dir, id)
}
// TODO: verify when CLDR bug https://unicode.org/cldr/trac/ticket/8970
// is resolved.
// path := strings.Split(id, "_")
// if lang := l.Identity.Language.Type; lang != path[0] {
// return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
// }
}
return nil
}
type pathLoader []string
func makePathLoader(path string) (pl pathLoader, err error) {
err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
pl = append(pl, path)
return err
})
return pl, err
}
func (pl pathLoader) Len() int {
return len(pl)
}
func (pl pathLoader) Path(i int) string {
return pl[i]
}
func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
return os.Open(pl[i])
}
// DecodePath loads CLDR data from the given path.
func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
loader, err := makePathLoader(path)
if err != nil {
return nil, err
}
return d.Decode(loader)
}
type zipLoader struct {
r *zip.Reader
}
func (zl zipLoader) Len() int {
return len(zl.r.File)
}
func (zl zipLoader) Path(i int) string {
return zl.r.File[i].Name
}
func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
return zl.r.File[i].Open()
}
// DecodeZip loads CLDR data from the zip archive for which r is the source.
func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
buffer, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
return nil, err
}
return d.Decode(zipLoader{archive})
}

400
vendor/golang.org/x/text/unicode/cldr/makexml.go generated vendored Normal file
View file

@ -0,0 +1,400 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This tool generates types for the various XML formats of CLDR.
package main
import (
"archive/zip"
"bytes"
"encoding/xml"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"strings"
"golang.org/x/text/internal/gen"
)
var outputFile = flag.String("output", "xml.go", "output file name")
func main() {
flag.Parse()
r := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(r)
if err != nil {
log.Fatal("Could not read zip file")
}
r.Close()
z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
log.Fatalf("Could not read zip archive: %v", err)
}
var buf bytes.Buffer
version := gen.CLDRVersion()
for _, dtd := range files {
for _, f := range z.File {
if strings.HasSuffix(f.Name, dtd.file+".dtd") {
r, err := f.Open()
failOnError(err)
b := makeBuilder(&buf, dtd)
b.parseDTD(r)
b.resolve(b.index[dtd.top[0]])
b.write()
if b.version != "" && version != b.version {
println(f.Name)
log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
}
break
}
}
}
fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
fmt.Fprintf(&buf, "const Version = %q\n", version)
gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}
func failOnError(err error) {
if err != nil {
log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
os.Exit(1)
}
}
// configuration data per DTD type
type dtd struct {
file string // base file name
root string // Go name of the root XML element
top []string // create a different type for this section
skipElem []string // hard-coded or deprecated elements
skipAttr []string // attributes to exclude
predefined []string // hard-coded elements exist of the form <name>Elem
forceRepeat []string // elements to make slices despite DTD
}
var files = []dtd{
{
file: "ldmlBCP47",
root: "LDMLBCP47",
top: []string{"ldmlBCP47"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
},
{
file: "ldmlSupplemental",
root: "SupplementalData",
top: []string{"supplementalData"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
forceRepeat: []string{
"plurals", // data defined in plurals.xml and ordinals.xml
},
},
{
file: "ldml",
root: "LDML",
top: []string{
"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
},
skipElem: []string{
"cp", // not used anywhere
"special", // not used anywhere
"fallback", // deprecated, not used
"alias", // in Common
"default", // in Common
},
skipAttr: []string{
"hiraganaQuarternary", // typo in DTD, correct version included as well
},
predefined: []string{"rules"},
},
}
var comments = map[string]string{
"ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
"supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
"ldml": `
// LDML is the top-level type for locale-specific data.
`,
"collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order.
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
"calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
"dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
"localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for scripts, languages,
// countries, currencies, and variants.
`,
"numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}
type element struct {
name string // XML element name
category string // elements contained by this element
signature string // category + attrKey*
attr []*attribute // attributes supported by this element.
sub []struct { // parsed and evaluated sub elements of this element.
e *element
repeat bool // true if the element needs to be a slice
}
resolved bool // prevent multiple resolutions of this element.
}
type attribute struct {
name string
key string
list []string
tag string // Go tag
}
var (
reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
reToken = regexp.MustCompile(`\w\-`)
)
// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
w io.Writer
index map[string]*element
elem []*element
info dtd
version string
}
func makeBuilder(w io.Writer, d dtd) builder {
return builder{
w: w,
index: make(map[string]*element),
elem: []*element{},
info: d,
}
}
// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
for d := xml.NewDecoder(r); ; {
t, err := d.Token()
if t == nil {
break
}
failOnError(err)
dir, ok := t.(xml.Directive)
if !ok {
continue
}
m := reHead.FindSubmatch(dir)
dir = dir[len(m[0]):]
ename := string(m[2])
el, elementFound := b.index[ename]
switch string(m[1]) {
case "ELEMENT":
if elementFound {
log.Fatal("parseDTD: duplicate entry for element %q", ename)
}
m := reElem.FindSubmatch(dir)
if m == nil {
log.Fatalf("parseDTD: invalid element %q", string(dir))
}
if len(m[0]) != len(dir) {
log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
}
s := string(m[1])
el = &element{
name: ename,
category: s,
}
b.index[ename] = el
case "ATTLIST":
if !elementFound {
log.Fatalf("parseDTD: unknown element %q", ename)
}
s := string(dir)
m := reAttr.FindStringSubmatch(s)
if m == nil {
log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
}
if m[4] == "FIXED" {
b.version = m[5]
} else {
switch m[1] {
case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
case "type", "choice":
default:
el.attr = append(el.attr, &attribute{
name: m[1],
key: s,
list: reToken.FindAllString(m[3], -1),
})
el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
}
}
}
}
}
var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
if e.resolved {
return
}
b.elem = append(b.elem, e)
e.resolved = true
s := e.category
found := make(map[string]bool)
sequenceStart := []int{}
for len(s) > 0 {
m := reCat.FindStringSubmatch(s)
if m == nil {
log.Fatalf("%s: invalid category string %q", e.name, s)
}
repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
switch m[1] {
case "":
case "(":
sequenceStart = append(sequenceStart, len(e.sub))
case ")":
if len(sequenceStart) == 0 {
log.Fatalf("%s: unmatched closing parenthesis", e.name)
}
for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
e.sub[i].repeat = e.sub[i].repeat || repeat
}
sequenceStart = sequenceStart[:len(sequenceStart)-1]
default:
if in(b.info.skipElem, m[1]) {
} else if sub, ok := b.index[m[1]]; ok {
if !found[sub.name] {
e.sub = append(e.sub, struct {
e *element
repeat bool
}{sub, repeat})
found[sub.name] = true
b.resolve(sub)
}
} else if m[1] == "#PCDATA" || m[1] == "ANY" {
} else if m[1] != "EMPTY" {
log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
}
}
s = s[len(m[0]):]
}
}
// return true if s is contained in set.
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
var repl = strings.NewReplacer("-", " ", "_", " ")
// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}
// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
p := func(f string, x ...interface{}) {
f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
fmt.Fprintf(b.w, f, x...)
}
if len(e.sub) == 0 && len(e.attr) == 0 {
p("Common")
return
}
p("struct {")
tab++
p("\nCommon")
for _, attr := range e.attr {
if !in(b.info.skipAttr, attr.name) {
p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
}
}
for _, sub := range e.sub {
if in(b.info.predefined, sub.e.name) {
p("\n%sElem", sub.e.name)
continue
}
if in(b.info.skipElem, sub.e.name) {
continue
}
p("\n%s ", title(sub.e.name))
if sub.repeat {
p("[]")
}
p("*")
if in(b.info.top, sub.e.name) {
p(title(sub.e.name))
} else {
b.writeElem(tab, sub.e)
}
p(" `xml:\"%s\"`", sub.e.name)
}
tab--
p("\n}")
}
// write generates the Go XML code.
func (b *builder) write() {
for i, name := range b.info.top {
e := b.index[name]
if e != nil {
fmt.Fprintf(b.w, comments[name])
name := title(e.name)
if i == 0 {
name = b.info.root
}
fmt.Fprintf(b.w, "type %s ", name)
b.writeElem(0, e)
fmt.Fprint(b.w, "\n")
}
}
}

602
vendor/golang.org/x/text/unicode/cldr/resolve.go generated vendored Normal file
View file

@ -0,0 +1,602 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
// This file implements the various inheritance constructs defined by LDML.
// See https://www.unicode.org/reports/tr35/#Inheritance_and_Validity
// for more details.
import (
"fmt"
"log"
"reflect"
"regexp"
"sort"
"strings"
)
// fieldIter iterates over fields in a struct. It includes
// fields of embedded structs.
type fieldIter struct {
v reflect.Value
index, n []int
}
func iter(v reflect.Value) fieldIter {
if v.Kind() != reflect.Struct {
log.Panicf("value %v must be a struct", v)
}
i := fieldIter{
v: v,
index: []int{0},
n: []int{v.NumField()},
}
i.descent()
return i
}
func (i *fieldIter) descent() {
for f := i.field(); f.Anonymous && f.Type.NumField() > 0; f = i.field() {
i.index = append(i.index, 0)
i.n = append(i.n, f.Type.NumField())
}
}
func (i *fieldIter) done() bool {
return len(i.index) == 1 && i.index[0] >= i.n[0]
}
func skip(f reflect.StructField) bool {
return !f.Anonymous && (f.Name[0] < 'A' || f.Name[0] > 'Z')
}
func (i *fieldIter) next() {
for {
k := len(i.index) - 1
i.index[k]++
if i.index[k] < i.n[k] {
if !skip(i.field()) {
break
}
} else {
if k == 0 {
return
}
i.index = i.index[:k]
i.n = i.n[:k]
}
}
i.descent()
}
func (i *fieldIter) value() reflect.Value {
return i.v.FieldByIndex(i.index)
}
func (i *fieldIter) field() reflect.StructField {
return i.v.Type().FieldByIndex(i.index)
}
type visitor func(v reflect.Value) error
var stopDescent = fmt.Errorf("do not recurse")
func (f visitor) visit(x interface{}) error {
return f.visitRec(reflect.ValueOf(x))
}
// visit recursively calls f on all nodes in v.
func (f visitor) visitRec(v reflect.Value) error {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
return nil
}
return f.visitRec(v.Elem())
}
if err := f(v); err != nil {
if err == stopDescent {
return nil
}
return err
}
switch v.Kind() {
case reflect.Struct:
for i := iter(v); !i.done(); i.next() {
if err := f.visitRec(i.value()); err != nil {
return err
}
}
case reflect.Slice:
for i := 0; i < v.Len(); i++ {
if err := f.visitRec(v.Index(i)); err != nil {
return err
}
}
}
return nil
}
// getPath is used for error reporting purposes only.
func getPath(e Elem) string {
if e == nil {
return "<nil>"
}
if e.enclosing() == nil {
return e.GetCommon().name
}
if e.GetCommon().Type == "" {
return fmt.Sprintf("%s.%s", getPath(e.enclosing()), e.GetCommon().name)
}
return fmt.Sprintf("%s.%s[type=%s]", getPath(e.enclosing()), e.GetCommon().name, e.GetCommon().Type)
}
// xmlName returns the xml name of the element or attribute
func xmlName(f reflect.StructField) (name string, attr bool) {
tags := strings.Split(f.Tag.Get("xml"), ",")
for _, s := range tags {
attr = attr || s == "attr"
}
return tags[0], attr
}
func findField(v reflect.Value, key string) (reflect.Value, error) {
v = reflect.Indirect(v)
for i := iter(v); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == key {
return i.value(), nil
}
}
return reflect.Value{}, fmt.Errorf("cldr: no field %q in element %#v", key, v.Interface())
}
var xpathPart = regexp.MustCompile(`(\pL+)(?:\[@(\pL+)='([\w-]+)'\])?`)
func walkXPath(e Elem, path string) (res Elem, err error) {
for _, c := range strings.Split(path, "/") {
if c == ".." {
if e = e.enclosing(); e == nil {
panic("path ..")
return nil, fmt.Errorf(`cldr: ".." moves past root in path %q`, path)
}
continue
} else if c == "" {
continue
}
m := xpathPart.FindStringSubmatch(c)
if len(m) == 0 || len(m[0]) != len(c) {
return nil, fmt.Errorf("cldr: syntax error in path component %q", c)
}
v, err := findField(reflect.ValueOf(e), m[1])
if err != nil {
return nil, err
}
switch v.Kind() {
case reflect.Slice:
i := 0
if m[2] != "" || v.Len() > 1 {
if m[2] == "" {
m[2] = "type"
if m[3] = e.GetCommon().Default(); m[3] == "" {
return nil, fmt.Errorf("cldr: type selector or default value needed for element %s", m[1])
}
}
for ; i < v.Len(); i++ {
vi := v.Index(i)
key, err := findField(vi.Elem(), m[2])
if err != nil {
return nil, err
}
key = reflect.Indirect(key)
if key.Kind() == reflect.String && key.String() == m[3] {
break
}
}
}
if i == v.Len() || v.Index(i).IsNil() {
return nil, fmt.Errorf("no %s found with %s==%s", m[1], m[2], m[3])
}
e = v.Index(i).Interface().(Elem)
case reflect.Ptr:
if v.IsNil() {
return nil, fmt.Errorf("cldr: element %q not found within element %q", m[1], e.GetCommon().name)
}
var ok bool
if e, ok = v.Interface().(Elem); !ok {
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
} else if m[2] != "" || m[3] != "" {
return nil, fmt.Errorf("cldr: no type selector allowed for element %s", m[1])
}
default:
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
}
}
return e, nil
}
const absPrefix = "//ldml/"
func (cldr *CLDR) resolveAlias(e Elem, src, path string) (res Elem, err error) {
if src != "locale" {
if !strings.HasPrefix(path, absPrefix) {
return nil, fmt.Errorf("cldr: expected absolute path, found %q", path)
}
path = path[len(absPrefix):]
if e, err = cldr.resolve(src); err != nil {
return nil, err
}
}
return walkXPath(e, path)
}
func (cldr *CLDR) resolveAndMergeAlias(e Elem) error {
alias := e.GetCommon().Alias
if alias == nil {
return nil
}
a, err := cldr.resolveAlias(e, alias.Source, alias.Path)
if err != nil {
return fmt.Errorf("%v: error evaluating path %q: %v", getPath(e), alias.Path, err)
}
// Ensure alias node was already evaluated. TODO: avoid double evaluation.
err = cldr.resolveAndMergeAlias(a)
v := reflect.ValueOf(e).Elem()
for i := iter(reflect.ValueOf(a).Elem()); !i.done(); i.next() {
if vv := i.value(); vv.Kind() != reflect.Ptr || !vv.IsNil() {
if _, attr := xmlName(i.field()); !attr {
v.FieldByIndex(i.index).Set(vv)
}
}
}
return err
}
func (cldr *CLDR) aliasResolver() visitor {
return func(v reflect.Value) (err error) {
if e, ok := v.Addr().Interface().(Elem); ok {
err = cldr.resolveAndMergeAlias(e)
if err == nil && blocking[e.GetCommon().name] {
return stopDescent
}
}
return err
}
}
// elements within blocking elements do not inherit.
// Taken from CLDR's supplementalMetaData.xml.
var blocking = map[string]bool{
"identity": true,
"supplementalData": true,
"cldrTest": true,
"collation": true,
"transform": true,
}
// Distinguishing attributes affect inheritance; two elements with different
// distinguishing attributes are treated as different for purposes of inheritance,
// except when such attributes occur in the indicated elements.
// Taken from CLDR's supplementalMetaData.xml.
var distinguishing = map[string][]string{
"key": nil,
"request_id": nil,
"id": nil,
"registry": nil,
"alt": nil,
"iso4217": nil,
"iso3166": nil,
"mzone": nil,
"from": nil,
"to": nil,
"type": []string{
"abbreviationFallback",
"default",
"mapping",
"measurementSystem",
"preferenceOrdering",
},
"numberSystem": nil,
}
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
// attrKey computes a key based on the distinguishable attributes of
// an element and its values.
func attrKey(v reflect.Value, exclude ...string) string {
parts := []string{}
ename := v.Interface().(Elem).GetCommon().name
v = v.Elem()
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); attr {
if except, ok := distinguishing[name]; ok && !in(exclude, name) && !in(except, ename) {
v := i.value()
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.IsValid() {
parts = append(parts, fmt.Sprintf("%s=%s", name, v.String()))
}
}
}
}
sort.Strings(parts)
return strings.Join(parts, ";")
}
// Key returns a key for e derived from all distinguishing attributes
// except those specified by exclude.
func Key(e Elem, exclude ...string) string {
return attrKey(reflect.ValueOf(e), exclude...)
}
// linkEnclosing sets the enclosing element as well as the name
// for all sub-elements of child, recursively.
func linkEnclosing(parent, child Elem) {
child.setEnclosing(parent)
v := reflect.ValueOf(child).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
linkEnclosing(child, vf.Index(j).Interface().(Elem))
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
linkEnclosing(child, vf.Interface().(Elem))
}
}
}
func setNames(e Elem, name string) {
e.setName(name)
v := reflect.ValueOf(e).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
name, _ = xmlName(i.field())
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
setNames(vf.Index(j).Interface().(Elem), name)
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
setNames(vf.Interface().(Elem), name)
}
}
}
// deepCopy copies elements of v recursively. All elements of v that may
// be modified by inheritance are explicitly copied.
func deepCopy(v reflect.Value) reflect.Value {
switch v.Kind() {
case reflect.Ptr:
if v.IsNil() || v.Elem().Kind() != reflect.Struct {
return v
}
nv := reflect.New(v.Elem().Type())
nv.Elem().Set(v.Elem())
deepCopyRec(nv.Elem(), v.Elem())
return nv
case reflect.Slice:
nv := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
for i := 0; i < v.Len(); i++ {
deepCopyRec(nv.Index(i), v.Index(i))
}
return nv
}
panic("deepCopy: must be called with pointer or slice")
}
// deepCopyRec is only called by deepCopy.
func deepCopyRec(nv, v reflect.Value) {
if v.Kind() == reflect.Struct {
t := v.Type()
for i := 0; i < v.NumField(); i++ {
if name, attr := xmlName(t.Field(i)); name != "" && !attr {
deepCopyRec(nv.Field(i), v.Field(i))
}
}
} else {
nv.Set(deepCopy(v))
}
}
// newNode is used to insert a missing node during inheritance.
func (cldr *CLDR) newNode(v, enc reflect.Value) reflect.Value {
n := reflect.New(v.Type())
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); name == "" || attr {
n.Elem().FieldByIndex(i.index).Set(i.value())
}
}
n.Interface().(Elem).GetCommon().setEnclosing(enc.Addr().Interface().(Elem))
return n
}
// v, parent must be pointers to struct
func (cldr *CLDR) inheritFields(v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
nv := reflect.New(t)
nv.Elem().Set(v)
for i := iter(v); !i.done(); i.next() {
vf := i.value()
f := i.field()
name, attr := xmlName(f)
if name == "" || attr {
continue
}
pf := parent.FieldByIndex(i.index)
if blocking[name] {
if vf.IsNil() {
vf = pf
}
nv.Elem().FieldByIndex(i.index).Set(deepCopy(vf))
continue
}
switch f.Type.Kind() {
case reflect.Ptr:
if f.Type.Elem().Kind() == reflect.Struct {
if !vf.IsNil() {
if vf, err = cldr.inheritStructPtr(vf, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
} else if !pf.IsNil() {
n := cldr.newNode(pf.Elem(), v)
if vf, err = cldr.inheritStructPtr(n, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
case reflect.Slice:
vf, err := cldr.inheritSlice(nv.Elem(), vf, pf)
if err != nil {
return reflect.Zero(t), err
}
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
return nv, nil
}
func root(e Elem) *LDML {
for ; e.enclosing() != nil; e = e.enclosing() {
}
return e.(*LDML)
}
// inheritStructPtr first merges possible aliases in with v and then inherits
// any underspecified elements from parent.
func (cldr *CLDR) inheritStructPtr(v, parent reflect.Value) (r reflect.Value, err error) {
if !v.IsNil() {
e := v.Interface().(Elem).GetCommon()
alias := e.Alias
if alias == nil && !parent.IsNil() {
alias = parent.Interface().(Elem).GetCommon().Alias
}
if alias != nil {
a, err := cldr.resolveAlias(v.Interface().(Elem), alias.Source, alias.Path)
if a != nil {
if v, err = cldr.inheritFields(v.Elem(), reflect.ValueOf(a).Elem()); err != nil {
return reflect.Value{}, err
}
}
}
if !parent.IsNil() {
return cldr.inheritFields(v.Elem(), parent.Elem())
}
} else if parent.IsNil() {
panic("should not reach here")
}
return v, nil
}
// Must be slice of struct pointers.
func (cldr *CLDR) inheritSlice(enc, v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
index := make(map[string]reflect.Value)
if !v.IsNil() {
for i := 0; i < v.Len(); i++ {
vi := v.Index(i)
key := attrKey(vi)
index[key] = vi
}
}
if !parent.IsNil() {
for i := 0; i < parent.Len(); i++ {
vi := parent.Index(i)
key := attrKey(vi)
if w, ok := index[key]; ok {
index[key], err = cldr.inheritStructPtr(w, vi)
} else {
n := cldr.newNode(vi.Elem(), enc)
index[key], err = cldr.inheritStructPtr(n, vi)
}
index[key].Interface().(Elem).setEnclosing(enc.Addr().Interface().(Elem))
if err != nil {
return v, err
}
}
}
keys := make([]string, 0, len(index))
for k, _ := range index {
keys = append(keys, k)
}
sort.Strings(keys)
sl := reflect.MakeSlice(t, len(index), len(index))
for i, k := range keys {
sl.Index(i).Set(index[k])
}
return sl, nil
}
func parentLocale(loc string) string {
parts := strings.Split(loc, "_")
if len(parts) == 1 {
return "root"
}
parts = parts[:len(parts)-1]
key := strings.Join(parts, "_")
return key
}
func (cldr *CLDR) resolve(loc string) (res *LDML, err error) {
if r := cldr.resolved[loc]; r != nil {
return r, nil
}
x := cldr.RawLDML(loc)
if x == nil {
return nil, fmt.Errorf("cldr: unknown locale %q", loc)
}
var v reflect.Value
if loc == "root" {
x = deepCopy(reflect.ValueOf(x)).Interface().(*LDML)
linkEnclosing(nil, x)
err = cldr.aliasResolver().visit(x)
} else {
key := parentLocale(loc)
var parent *LDML
for ; cldr.locale[key] == nil; key = parentLocale(key) {
}
if parent, err = cldr.resolve(key); err != nil {
return nil, err
}
v, err = cldr.inheritFields(reflect.ValueOf(x).Elem(), reflect.ValueOf(parent).Elem())
x = v.Interface().(*LDML)
linkEnclosing(nil, x)
}
if err != nil {
return nil, err
}
cldr.resolved[loc] = x
return x, err
}
// finalize finalizes the initialization of the raw LDML structs. It also
// removed unwanted fields, as specified by filter, so that they will not
// be unnecessarily evaluated.
func (cldr *CLDR) finalize(filter []string) {
for _, x := range cldr.locale {
if filter != nil {
v := reflect.ValueOf(x).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
f := t.Field(i)
name, _ := xmlName(f)
if name != "" && name != "identity" && !in(filter, name) {
v.Field(i).Set(reflect.Zero(f.Type))
}
}
}
linkEnclosing(nil, x) // for resolving aliases and paths
setNames(x, "ldml")
}
}

144
vendor/golang.org/x/text/unicode/cldr/slice.go generated vendored Normal file
View file

@ -0,0 +1,144 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"fmt"
"reflect"
"sort"
)
// Slice provides utilities for modifying slices of elements.
// It can be wrapped around any slice of which the element type implements
// interface Elem.
type Slice struct {
ptr reflect.Value
typ reflect.Type
}
// Value returns the reflect.Value of the underlying slice.
func (s *Slice) Value() reflect.Value {
return s.ptr.Elem()
}
// MakeSlice wraps a pointer to a slice of Elems.
// It replaces the array pointed to by the slice so that subsequent modifications
// do not alter the data in a CLDR type.
// It panics if an incorrect type is passed.
func MakeSlice(slicePtr interface{}) Slice {
ptr := reflect.ValueOf(slicePtr)
if ptr.Kind() != reflect.Ptr {
panic(fmt.Sprintf("MakeSlice: argument must be pointer to slice, found %v", ptr.Type()))
}
sl := ptr.Elem()
if sl.Kind() != reflect.Slice {
panic(fmt.Sprintf("MakeSlice: argument must point to a slice, found %v", sl.Type()))
}
intf := reflect.TypeOf((*Elem)(nil)).Elem()
if !sl.Type().Elem().Implements(intf) {
panic(fmt.Sprintf("MakeSlice: element type of slice (%v) does not implement Elem", sl.Type().Elem()))
}
nsl := reflect.MakeSlice(sl.Type(), sl.Len(), sl.Len())
reflect.Copy(nsl, sl)
sl.Set(nsl)
return Slice{
ptr: ptr,
typ: sl.Type().Elem().Elem(),
}
}
func (s Slice) indexForAttr(a string) []int {
for i := iter(reflect.Zero(s.typ)); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == a {
return i.index
}
}
panic(fmt.Sprintf("MakeSlice: no attribute %q for type %v", a, s.typ))
}
// Filter filters s to only include elements for which fn returns true.
func (s Slice) Filter(fn func(e Elem) bool) {
k := 0
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
if fn(vi.Interface().(Elem)) {
sl.Index(k).Set(vi)
k++
}
}
sl.Set(sl.Slice(0, k))
}
// Group finds elements in s for which fn returns the same value and groups
// them in a new Slice.
func (s Slice) Group(fn func(e Elem) string) []Slice {
m := make(map[string][]reflect.Value)
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
key := fn(vi.Interface().(Elem))
m[key] = append(m[key], vi)
}
keys := []string{}
for k, _ := range m {
keys = append(keys, k)
}
sort.Strings(keys)
res := []Slice{}
for _, k := range keys {
nsl := reflect.New(sl.Type())
nsl.Elem().Set(reflect.Append(nsl.Elem(), m[k]...))
res = append(res, MakeSlice(nsl.Interface()))
}
return res
}
// SelectAnyOf filters s to contain only elements for which attr matches
// any of the values.
func (s Slice) SelectAnyOf(attr string, values ...string) {
index := s.indexForAttr(attr)
s.Filter(func(e Elem) bool {
vf := reflect.ValueOf(e).Elem().FieldByIndex(index)
return in(values, vf.String())
})
}
// SelectOnePerGroup filters s to include at most one element e per group of
// elements matching Key(attr), where e has an attribute a that matches any
// the values in v.
// If more than one element in a group matches a value in v preference
// is given to the element that matches the first value in v.
func (s Slice) SelectOnePerGroup(a string, v []string) {
index := s.indexForAttr(a)
grouped := s.Group(func(e Elem) string { return Key(e, a) })
sl := s.Value()
sl.Set(sl.Slice(0, 0))
for _, g := range grouped {
e := reflect.Value{}
found := len(v)
gsl := g.Value()
for i := 0; i < gsl.Len(); i++ {
vi := gsl.Index(i).Elem().FieldByIndex(index)
j := 0
for ; j < len(v) && v[j] != vi.String(); j++ {
}
if j < found {
found = j
e = gsl.Index(i)
}
}
if found < len(v) {
sl.Set(reflect.Append(sl, e))
}
}
}
// SelectDraft drops all elements from the list with a draft level smaller than d
// and selects the highest draft level of the remaining.
// This method assumes that the input CLDR is canonicalized.
func (s Slice) SelectDraft(d Draft) {
s.SelectOnePerGroup("draft", drafts[len(drafts)-2-int(d):])
}

1494
vendor/golang.org/x/text/unicode/cldr/xml.go generated vendored Normal file

File diff suppressed because it is too large Load diff

512
vendor/golang.org/x/text/unicode/norm/composition.go generated vendored Normal file
View file

@ -0,0 +1,512 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
const (
maxNonStarters = 30
// The maximum number of characters needed for a buffer is
// maxNonStarters + 1 for the starter + 1 for the GCJ
maxBufferSize = maxNonStarters + 2
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// ssState is used for reporting the segment state after inserting a rune.
// It is returned by streamSafe.next.
type ssState int
const (
// Indicates a rune was successfully added to the segment.
ssSuccess ssState = iota
// Indicates a rune starts a new segment and should not be added.
ssStarter
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
ssOverflow
)
// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
// first inserts the first rune of a segment. It is a faster version of next if
// it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties) {
*ss = streamSafe(p.nTrailingNonStarters())
}
// insert returns a ssState value to indicate whether a rune represented by p
// can be inserted.
func (ss *streamSafe) next(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
n := p.nLeadingNonStarters()
if *ss += streamSafe(n); *ss > maxNonStarters {
*ss = 0
return ssOverflow
}
// The Stream-Safe Text Processing prescribes that the counting can stop
// as soon as a starter is encountered. However, there are some starters,
// like Jamo V and T, that can combine with other runes, leaving their
// successive non-starters appended to the previous, possibly causing an
// overflow. We will therefore consider any rune with a non-zero nLead to
// be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail.
if n == 0 {
*ss = streamSafe(p.nTrailingNonStarters())
return ssStarter
}
return ssSuccess
}
// backwards is used for checking for overflow and segment starts
// when traversing a string backwards. Users do not need to call first
// for the first rune. The state of the streamSafe retains the count of
// the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
c := *ss + streamSafe(p.nTrailingNonStarters())
if c > maxNonStarters {
return ssOverflow
}
*ss = c
if p.nLeadingNonStarters() == 0 {
return ssStarter
}
return ssSuccess
}
func (ss streamSafe) isMax() bool {
return ss == maxNonStarters
}
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"
// reorderBuffer is used to normalize a single segment. Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties // Per character info.
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
nbyte uint8 // Number or bytes.
ss streamSafe // For limiting length of non-starter sequence.
nrune int // Number of runeInfos.
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.src.setBytes(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.src.setString(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
rb.out = out
rb.flushF = f
}
// reset discards all characters from the buffer.
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
}
func (rb *reorderBuffer) doFlush() bool {
if rb.f.composing {
rb.compose()
}
res := rb.flushF(rb)
rb.reset()
return res
}
// appendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
rb.out = append(rb.out, rb.byte[start:end]...)
}
return true
}
// flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
out = append(out, rb.byte[start:end]...)
}
rb.reset()
return out
}
// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int {
p := 0
for i := 0; i < rb.nrune; i++ {
runep := rb.rune[i]
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
}
rb.reset()
return p
}
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties) {
n := rb.nrune
b := rb.rune[:]
cc := info.ccc
if cc > 0 {
// Find insertion position + move elements to make room.
for ; n > 0; n-- {
if b[n-1].ccc <= cc {
break
}
b[n] = b[n-1]
}
}
rb.nrune += 1
pos := uint8(rb.nbyte)
rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
}
// insertErr is an error code returned by insert. Using this type instead
// of error improves performance up to 20% for many of the benchmarks.
type insertErr int
const (
iSuccess insertErr = -iota
iShortDst
iShortSrc
)
// insertFlush inserts the given rune in the buffer ordered by CCC.
// If a decomposition with multiple segments are encountered, they leading
// ones are flushed.
// It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
return iSuccess
}
if info.hasDecomposition() {
return rb.insertDecomposed(info.Decomposition())
}
rb.insertSingle(src, i, info)
return iSuccess
}
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
// It is assumed there is sufficient space to hold the runes. It is the
// responsibility of the caller to ensure this. This can be done by checking
// the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
}
if info.hasDecomposition() {
// TODO: inline.
rb.insertDecomposed(info.Decomposition())
} else {
rb.insertSingle(src, i, info)
}
}
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
// It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp)
// As the streamSafe accounting already handles the counting for modifiers,
// we don't have to call next. However, we do need to keep the accounting
// intact when flushing the buffer.
for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
return iShortDst
}
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
rb.insertOrdered(info)
}
return iSuccess
}
// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
rb.insertOrdered(info)
}
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ() {
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune {
inf := rb.rune[n]
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return r
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte {
inf := rb.rune[n]
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
}
// For Hangul we combine algorithmically, instead of using tables.
const (
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
hangulBase0 = 0xEA
hangulBase1 = 0xB0
hangulBase2 = 0x80
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
hangulEnd0 = 0xED
hangulEnd1 = 0x9E
hangulEnd2 = 0xA4
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
jamoLBase0 = 0xE1
jamoLBase1 = 0x84
jamoLEnd = 0x1113
jamoVBase = 0x1161
jamoVEnd = 0x1176
jamoTBase = 0x11A7
jamoTEnd = 0x11C3
jamoTCount = 28
jamoVCount = 21
jamoVTCount = 21 * 28
jamoLVTCount = 19 * 21 * 28
)
const hangulUTF8Size = 3
func isHangul(b []byte) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
func isHangulString(b string) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
// Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool {
// True if (rune & 0xff00) == jamoLBase
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
}
func isHangulWithoutJamoT(b []byte) bool {
c, _ := utf8.DecodeRune(b)
c -= hangulBase
return c < jamoLVTCount && c%jamoTCount == 0
}
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
const JamoUTF8Len = 3
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
if x != 0 {
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
return 3 * JamoUTF8Len
}
return 2 * JamoUTF8Len
}
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) {
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
s = k - 1
}
if s != k-1 && cccB >= cccC {
// b[i] is blocked by greater-equal cccX below it
b[k] = b[i]
k++
} else {
l := rb.runeAt(s) // also used to compare to hangulBase
v := rb.runeAt(i) // also used to compare to jamoT
switch {
case jamoLBase <= l && l < jamoLEnd &&
jamoVBase <= v && v < jamoVEnd:
// 11xx plus 116x to LV
rb.assignRune(s, hangulBase+
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
case hangulBase <= l && l < hangulEnd &&
jamoTBase < v && v < jamoTEnd &&
((l-hangulBase)%jamoTCount) == 0:
// ACxx plus 11Ax to LVT
rb.assignRune(s, l+v-jamoTBase)
default:
b[k] = b[i]
k++
}
}
}
rb.nrune = k
}
// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// Lazily load the map used by the combine func below, but do
// it outside of the loop.
recompMapOnce.Do(buildRecompMap)
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
bn := rb.nrune
if bn == 0 {
return
}
k := 1
b := rb.rune[:]
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul(s, i, k)
return
}
ii := b[i]
// We can only use combineForward as a filter if we later
// get the info for the combined character. This is more
// expensive than using the filter. Using combinesBackward()
// is safe.
if ii.combinesBackward() {
cccB := b[k-1].ccc
cccC := ii.ccc
blocked := false // b[i] blocked by starter or greater or equal CCC?
if cccB == 0 {
s = k - 1
} else {
blocked = s != k-1 && cccB >= cccC
}
if !blocked {
combined := combine(rb.runeAt(s), rb.runeAt(i))
if combined != 0 {
rb.assignRune(s, combined)
continue
}
}
}
b[k] = b[i]
k++
}
rb.nrune = k
}

278
vendor/golang.org/x/text/unicode/norm/forminfo.go generated vendored Normal file
View file

@ -0,0 +1,278 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "encoding/binary"
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 15: 1 (inverse of NFD_QC bit of qcInfo)
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
// 6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
// <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero. The value of v determines which ccc are appended
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc. The value of tccc itself is the
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
// are the number of trailing non-starters.
const (
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
headerLenMask = 0x3F // extract the length value from the header byte
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)
// Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8 // start position in reorderBuffer; used in composition.go
size uint8 // length of UTF-8 encoding of this rune
ccc uint8 // leading canonical combining class (ccc if not decomposition)
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
nLead uint8 // number of leading non-starters.
flags qcInfo // quick check flags
index uint16
}
// functions dispatchable per form
type lookupFunc func(b input, i int) Properties
// formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
nextMain iterFunc
}
var formTable = []*formInfo{{
form: NFC,
composing: true,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextComposed,
}, {
form: NFD,
composing: false,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextDecomposed,
}, {
form: NFKC,
composing: true,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextComposed,
}, {
form: NFKD,
composing: false,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextDecomposed,
}}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary
// after 'a'. However, 'a' might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.
// BoundaryBefore returns true if this rune starts a new segment and
// cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() bool {
if p.ccc == 0 && !p.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
// is always non-zero if different from info.ccc and that we can return
// false at this point. This is verified by maketables.
return false
}
// BoundaryAfter returns true if runes cannot combine with or otherwise
// interact with this or previous runes.
func (p Properties) BoundaryAfter() bool {
// TODO: loosen these conditions.
return p.isInert()
}
// We pack quick check data in 4 bits:
// 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 }
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
func (p Properties) isInert() bool {
return p.flags&qcInfoMask == 0 && p.ccc == 0
}
func (p Properties) multiSegment() bool {
return p.index >= firstMulti && p.index < endMulti
}
func (p Properties) nLeadingNonStarters() uint8 {
return p.nLead
}
func (p Properties) nTrailingNonStarters() uint8 {
return uint8(p.flags & 0x03)
}
// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func (p Properties) Decomposition() []byte {
// TODO: create the decomposition for Hangul?
if p.index == 0 {
return nil
}
i := p.index
n := decomps[i] & headerLenMask
i++
return decomps[i : i+uint16(n)]
}
// Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() int {
return int(p.size)
}
// CCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8 {
if p.index >= firstCCCZeroExcept {
return 0
}
return ccc[p.ccc]
}
// LeadCCC returns the CCC of the first rune in the decomposition.
// If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8 {
return ccc[p.ccc]
}
// TrailCCC returns the CCC of the last rune in the decomposition.
// If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}
func buildRecompMap() {
recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
var buf [8]byte
for i := 0; i < len(recompMapPacked); i += 8 {
copy(buf[:], recompMapPacked[i:i+8])
key := binary.BigEndian.Uint32(buf[:4])
val := binary.BigEndian.Uint32(buf[4:])
recompMap[key] = rune(val)
}
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
//
// The caller is responsible for calling
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
if recompMap == nil {
panic("caller error") // see func comment
}
return recompMap[key]
}
func lookupInfoNFC(b input, i int) Properties {
v, sz := b.charinfoNFC(i)
return compInfo(v, sz)
}
func lookupInfoNFKC(b input, i int) Properties {
v, sz := b.charinfoNFKC(i)
return compInfo(v, sz)
}
// Properties returns properties for the first rune in s.
func (f Form) Properties(s []byte) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookup(s))
}
return compInfo(nfkcData.lookup(s))
}
// PropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookupString(s))
}
return compInfo(nfkcData.lookupString(s))
}
// compInfo converts the information contained in v and sz
// to a Properties. See the comment at the top of the file
// for more information on the format.
func compInfo(v uint16, sz int) Properties {
if v == 0 {
return Properties{size: uint8(sz)}
} else if v >= 0x8000 {
p := Properties{
size: uint8(sz),
ccc: uint8(v),
tccc: uint8(v),
flags: qcInfo(v >> 8),
}
if p.ccc > 0 || p.combinesBackward() {
p.nLead = uint8(p.flags & 0x3)
}
return p
}
// has decomposition
h := decomps[v]
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
p := Properties{size: uint8(sz), flags: f, index: v}
if v >= firstCCC {
v += uint16(h&headerLenMask) + 1
c := decomps[v]
p.tccc = c >> 2
p.flags |= qcInfo(c & 0x3)
if v >= firstLeadingCCC {
p.nLead = c & 0x3
if v >= firstStarterWithNLead {
// We were tricked. Remove the decomposition.
p.flags &= 0x03
p.index = 0
return p
}
p.ccc = decomps[v+1]
}
}
return p
}

109
vendor/golang.org/x/text/unicode/norm/input.go generated vendored Normal file
View file

@ -0,0 +1,109 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
type input struct {
str string
bytes []byte
}
func inputBytes(str []byte) input {
return input{bytes: str}
}
func inputString(str string) input {
return input{str: str}
}
func (in *input) setBytes(str []byte) {
in.str = ""
in.bytes = str
}
func (in *input) setString(str string) {
in.str = str
in.bytes = nil
}
func (in *input) _byte(p int) byte {
if in.bytes == nil {
return in.str[p]
}
return in.bytes[p]
}
func (in *input) skipASCII(p, max int) int {
if in.bytes == nil {
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
}
} else {
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
}
}
return p
}
func (in *input) skipContinuationBytes(p int) int {
if in.bytes == nil {
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
}
} else {
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
}
}
return p
}
func (in *input) appendSlice(buf []byte, b, e int) []byte {
if in.bytes != nil {
return append(buf, in.bytes[b:e]...)
}
for i := b; i < e; i++ {
buf = append(buf, in.str[i])
}
return buf
}
func (in *input) copySlice(buf []byte, b, e int) int {
if in.bytes == nil {
return copy(buf, in.str[b:e])
}
return copy(buf, in.bytes[b:e])
}
func (in *input) charinfoNFC(p int) (uint16, int) {
if in.bytes == nil {
return nfcData.lookupString(in.str[p:])
}
return nfcData.lookup(in.bytes[p:])
}
func (in *input) charinfoNFKC(p int) (uint16, int) {
if in.bytes == nil {
return nfkcData.lookupString(in.str[p:])
}
return nfkcData.lookup(in.bytes[p:])
}
func (in *input) hangul(p int) (r rune) {
var size int
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, size = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, size = utf8.DecodeRune(in.bytes[p:])
}
if size != hangulUTF8Size {
return 0
}
return r
}

458
vendor/golang.org/x/text/unicode/norm/iter.go generated vendored Normal file
View file

@ -0,0 +1,458 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"unicode/utf8"
)
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
// sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
asciiF iterFunc
p int // current position in input source
multiSeg []byte // remainder of multi-segment decomposition
}
type iterFunc func(*Iter) []byte
// Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.init(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.initString(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
case 0:
abs = offset
case 1:
abs = int64(i.p) + offset
case 2:
abs = int64(i.rb.nsrc) + offset
default:
return 0, fmt.Errorf("norm: invalid whence")
}
if abs < 0 {
return 0, fmt.Errorf("norm: negative position")
}
if int(abs) >= i.rb.nsrc {
i.setDone()
return int64(i.p), nil
}
i.p = int(abs)
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
return abs, nil
}
// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func (i *Iter) returnSlice(a, b int) []byte {
if i.rb.src.bytes == nil {
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
}
return i.rb.src.bytes[a:b]
}
// Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int {
return i.p
}
func (i *Iter) setDone() {
i.next = nextDone
i.p = i.rb.nsrc
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.p >= i.rb.nsrc
}
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte {
return i.next(i)
}
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
p0 := i.p
i.setDone()
return i.rb.src.bytes[p0:p]
}
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p
i.p = p
return i.rb.src.bytes[p0:p]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextASCIIString(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.buf[0] = i.rb.src.str[i.p]
i.setDone()
return i.buf[:1]
}
if i.rb.src.str[p] < utf8.RuneSelf {
i.buf[0] = i.rb.src.str[i.p]
i.p = p
return i.buf[:1]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextHangul(i *Iter) []byte {
p := i.p
next := p + hangulUTF8Size
if next >= i.rb.nsrc {
i.setDone()
} else if i.rb.src.hangul(next) == 0 {
i.rb.ss.next(i.info)
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
i.p = next
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
}
func nextDone(i *Iter) []byte {
return nil
}
// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
// treat last segment as normal decomposition
i.next = i.rb.f.nextMain
return i.next(i)
}
// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm(i *Iter) []byte {
j := 0
d := i.multiSeg
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):]
return seg
}
i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size)
}
i.multiSeg = nil
i.next = nextComposed
return doNormComposed(i)
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte) {
outp := 0
inCopyStart, outCopyStart := i.p, 0
for {
if sz := int(i.info.size); sz <= 1 {
i.rb.ss = 0
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
i.setDone()
return i.returnSlice(p, i.p)
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
return i.returnSlice(p, i.p)
}
outp++
} else if d := i.info.Decomposition(); d != nil {
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
// Case 1: there is a leftover to copy. In this case the decomposition
// must begin with a modifier and should always be appended.
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
p := outp + len(d)
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
// TODO: this condition should not be possible, but we leave it
// in for defensive purposes.
if p > len(i.buf) {
return i.buf[:outp]
}
} else if i.info.multiSegment() {
// outp must be 0 as multi-segment decompositions always
// start a new segment.
if i.multiSeg == nil {
i.multiSeg = d
i.next = nextMulti
return nextMulti(i)
}
// We are in the last segment. Treat as normal decomposition.
d = i.multiSeg
i.multiSeg = nil
p = len(d)
}
prevCC := i.info.tccc
if i.p += sz; i.p >= i.rb.nsrc {
i.setDone()
i.info = Properties{} // Force BoundaryBefore to succeed.
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
switch i.rb.ss.next(i.info) {
case ssOverflow:
i.next = nextCGJDecompose
fallthrough
case ssStarter:
if outp > 0 {
copy(i.buf[outp:], d)
return i.buf[:p]
}
return d
}
copy(i.buf[outp:], d)
outp = p
inCopyStart, outCopyStart = i.p, outp
if i.info.ccc < prevCC {
goto doNorm
}
continue
} else if r := i.rb.src.hangul(i.p); r != 0 {
outp = decomposeHangul(i.buf[:], r)
i.p += hangulUTF8Size
inCopyStart, outCopyStart = i.p, outp
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src.hangul(i.p) != 0 {
i.next = nextHangul
return i.buf[:outp]
}
} else {
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
i.setDone()
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJDecompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
if outCopyStart == 0 {
return i.returnSlice(inCopyStart, i.p)
} else if inCopyStart < i.p {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
}
return i.buf[:outp]
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
i.rb.insertDecomposed(i.buf[0:outp])
return doNormDecomposed(i)
}
func doNormDecomposed(i *Iter) []byte {
for {
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
break
}
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
}
// new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])]
}
func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0
i.rb.insertCGJ()
i.next = nextDecomposed
i.rb.ss.first(i.info)
buf := doNormDecomposed(i)
return buf
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
for {
if !i.info.isYesC() {
goto doNorm
}
prevCC = i.info.tccc
sz := int(i.info.size)
if sz == 0 {
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.rb.ss = 0
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJCompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
return i.returnSlice(startp, i.p)
doNorm:
// reset to start position
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
if i.info.multiSegment() {
d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0)
i.rb.insertUnsafe(input{bytes: d}, 0, info)
i.multiSeg = d[int(info.size):]
i.next = nextMultiNorm
return nextMultiNorm(i)
}
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}
func doNormComposed(i *Iter) []byte {
// First rune should already be inserted.
for {
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if s := i.rb.ss.next(i.info); s == ssStarter {
break
} else if s == ssOverflow {
i.next = nextCGJCompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
}
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
return seg
}
func nextCGJCompose(i *Iter) []byte {
i.rb.ss = 0 // instead of first
i.rb.insertCGJ()
i.next = nextComposed
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
// If we ever change that, insert a check here.
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}

986
vendor/golang.org/x/text/unicode/norm/maketables.go generated vendored Normal file
View file

@ -0,0 +1,986 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Normalization table generator.
// Data read from the web.
// See forminfo.go for a description of the trie values associated with each rune.
package main
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
func main() {
gen.Init()
loadUnicodeData()
compactCCC()
loadCompositionExclusions()
completeCharFields(FCanonical)
completeCharFields(FCompatibility)
computeNonStarterCounts()
verifyComputed()
printChars()
testDerived()
printTestdata()
makeTables()
}
var (
tablelist = flag.String("tables",
"all",
"comma-separated list of which tables to generate; "+
"can be 'decomp', 'recomp', 'info' and 'all'")
test = flag.Bool("test",
false,
"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
verbose = flag.Bool("verbose",
false,
"write data to stdout as it is parsed")
)
const MaxChar = 0x10FFFF // anything above this shouldn't exist
// Quick Check properties of runes allow us to quickly
// determine whether a rune may occur in a normal form.
// For a given normal form, a rune may be guaranteed to occur
// verbatim (QC=Yes), may or may not combine with another
// rune (QC=Maybe), or may not occur (QC=No).
type QCResult int
const (
QCUnknown QCResult = iota
QCYes
QCNo
QCMaybe
)
func (r QCResult) String() string {
switch r {
case QCYes:
return "Yes"
case QCNo:
return "No"
case QCMaybe:
return "Maybe"
}
return "***UNKNOWN***"
}
const (
FCanonical = iota // NFC or NFD
FCompatibility // NFKC or NFKD
FNumberOfFormTypes
)
const (
MComposed = iota // NFC or NFKC
MDecomposed // NFD or NFKD
MNumberOfModes
)
// This contains only the properties we're interested in.
type Char struct {
name string
codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
origCCC uint8
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
nTrailingNonStarters uint8
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
state State
}
var chars = make([]Char, MaxChar+1)
var cccMap = make(map[uint8]uint8)
func (c Char) String() string {
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
fmt.Fprintf(buf, " state: %v\n", c.state)
fmt.Fprintf(buf, " NFC:\n")
fmt.Fprint(buf, c.forms[FCanonical])
fmt.Fprintf(buf, " NFKC:\n")
fmt.Fprint(buf, c.forms[FCompatibility])
return buf.String()
}
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
// parseCharacter keeps a state variable indicating the weirdness.
type State int
const (
SNormal State = iota // known to be zero for the type
SFirst
SLast
SMissing
)
var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
}
type FormInfo struct {
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
combinesForward bool // May combine with rune on the right
combinesBackward bool // May combine with rune on the left
isOneWay bool // Never appears in result
inDecomp bool // Some decompositions result in this char.
decomp Decomposition
expandedDecomp Decomposition
}
func (f FormInfo) String() string {
buf := bytes.NewBuffer(make([]byte, 0))
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
return buf.String()
}
type Decomposition []rune
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
}
for _, d := range decomp {
point, err := strconv.ParseUint(d, 16, 64)
if err != nil {
return a, err
}
a = append(a, rune(point))
}
return a, nil
}
func loadUnicodeData() {
f := gen.OpenUCDFile("UnicodeData.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(ucd.CodePoint)
char := &chars[r]
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
decmap := p.String(ucd.DecompMapping)
exp, err := parseDecomposition(decmap, false)
isCompat := false
if err != nil {
if len(decmap) > 0 {
exp, err = parseDecomposition(decmap, true)
if err != nil {
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
}
isCompat = true
}
}
char.name = p.String(ucd.Name)
char.codePoint = r
char.forms[FCompatibility].decomp = exp
if !isCompat {
char.forms[FCanonical].decomp = exp
} else {
char.compatDecomp = true
}
if len(decmap) > 0 {
char.forms[FCompatibility].decomp = exp
}
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
}
// compactCCC converts the sparse set of CCC values to a continguous one,
// reducing the number of bits needed from 8 to 6.
func compactCCC() {
m := make(map[uint8]uint8)
for i := range chars {
c := &chars[i]
m[c.ccc] = 0
}
cccs := []int{}
for v, _ := range m {
cccs = append(cccs, int(v))
}
sort.Ints(cccs)
for i, c := range cccs {
cccMap[uint8(i)] = uint8(c)
m[uint8(c)] = uint8(i)
}
for i := range chars {
c := &chars[i]
c.origCCC = c.ccc
c.ccc = m[c.ccc]
}
if len(m) >= 1<<6 {
log.Fatalf("too many difference CCC values: %d >= 64", len(m))
}
}
// CompositionExclusions.txt has form:
// 0958 # ...
// See https://unicode.org/reports/tr44/ for full explanation
func loadCompositionExclusions() {
f := gen.OpenUCDFile("CompositionExclusions.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
c := &chars[p.Rune(0)]
if c.excludeInComp {
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
}
c.excludeInComp = true
}
if e := p.Err(); e != nil {
log.Fatal(e)
}
}
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
func hasCompatDecomp(r rune) bool {
c := &chars[r]
if c.compatDecomp {
return true
}
for _, d := range c.forms[FCompatibility].decomp {
if hasCompatDecomp(d) {
return true
}
}
return false
}
// Hangul related constants.
const (
HangulBase = 0xAC00
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
JamoLBase = 0x1100
JamoLEnd = 0x1113
JamoVBase = 0x1161
JamoVEnd = 0x1176
JamoTBase = 0x11A8
JamoTEnd = 0x11C3
JamoLVTCount = 19 * 21 * 28
JamoTCount = 28
)
func isHangul(r rune) bool {
return HangulBase <= r && r < HangulEnd
}
func isHangulWithoutJamoT(r rune) bool {
if !isHangul(r) {
return false
}
r -= HangulBase
return r < JamoLVTCount && r%JamoTCount == 0
}
func ccc(r rune) uint8 {
return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
if ccc(b[n-1]) <= cc {
break
}
b[n] = b[n-1]
}
}
b[n] = r
return b
}
// Recursively decompose.
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
}
return d
}
func completeCharFields(form int) {
// Phase 0: pre-expand decomposition.
for i := range chars {
f := &chars[i].forms[form]
if len(f.decomp) == 0 {
continue
}
exp := make(Decomposition, 0)
for _, c := range f.decomp {
exp = decomposeRecursive(form, c, exp)
}
f.expandedDecomp = exp
}
// Phase 1: composition exclusion, mark decomposition.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
// Marks script-specific exclusions and version restricted.
f.isOneWay = c.excludeInComp
// Singletons
f.isOneWay = f.isOneWay || len(f.decomp) == 1
// Non-starter decompositions
if len(f.decomp) > 1 {
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
f.isOneWay = f.isOneWay || chk
}
// Runes that decompose into more than two runes.
f.isOneWay = f.isOneWay || len(f.decomp) > 2
if form == FCompatibility {
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
for _, r := range f.decomp {
chars[r].forms[form].inDecomp = true
}
}
// Phase 2: forward and backward combining.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
if !f.isOneWay && len(f.decomp) == 2 {
f0 := &chars[f.decomp[0]].forms[form]
f1 := &chars[f.decomp[1]].forms[form]
if !f0.isOneWay {
f0.combinesForward = true
}
if !f1.isOneWay {
f1.combinesBackward = true
}
}
if isHangulWithoutJamoT(rune(i)) {
f.combinesForward = true
}
}
// Phase 3: quick check values.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
}
switch {
case f.isOneWay:
f.quickCheck[MComposed] = QCNo
case (i & 0xffff00) == JamoLBase:
f.quickCheck[MComposed] = QCYes
if JamoLBase <= i && i < JamoLEnd {
f.combinesForward = true
}
if JamoVBase <= i && i < JamoVEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
f.combinesForward = true
}
if JamoTBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
}
case !f.combinesBackward:
f.quickCheck[MComposed] = QCYes
default:
f.quickCheck[MComposed] = QCMaybe
}
}
}
func computeNonStarterCounts() {
// Phase 4: leading and trailing non-starter count
for i := range chars {
c := &chars[i]
runes := []rune{rune(i)}
// We always use FCompatibility so that the CGJ insertion points do not
// change for repeated normalizations with different forms.
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
runes = exp
}
// We consider runes that combine backwards to be non-starters for the
// purpose of Stream-Safe Text Processing.
for _, r := range runes {
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nLeadingNonStarters++
}
for i := len(runes) - 1; i >= 0; i-- {
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nTrailingNonStarters++
}
if c.nTrailingNonStarters > 3 {
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
}
if isHangul(rune(i)) {
c.nTrailingNonStarters = 2
if isHangulWithoutJamoT(rune(i)) {
c.nTrailingNonStarters = 1
}
}
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
}
if t := c.nTrailingNonStarters; t > 3 {
log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
}
}
}
func printBytes(w io.Writer, b []byte, name string) {
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
fmt.Fprintf(w, "var %s = [...]byte {", name)
for i, c := range b {
switch {
case i%64 == 0:
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
case i%8 == 0:
fmt.Fprintf(w, "\n")
}
fmt.Fprintf(w, "0x%.2X, ", c)
}
fmt.Fprint(w, "\n}\n\n")
}
// See forminfo.go for format.
func makeEntry(f *FormInfo, c *Char) uint16 {
e := uint16(0)
if r := c.codePoint; HangulBase <= r && r < HangulEnd {
e |= 0x40
}
if f.combinesForward {
e |= 0x20
}
if f.quickCheck[MDecomposed] == QCNo {
e |= 0x4
}
switch f.quickCheck[MComposed] {
case QCYes:
case QCNo:
e |= 0x10
case QCMaybe:
e |= 0x18
default:
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
}
e |= uint16(c.nTrailingNonStarters)
return e
}
// decompSet keeps track of unique decompositions, grouped by whether
// the decomposition is followed by a trailing and/or leading CCC.
type decompSet [7]map[string]bool
const (
normalDecomp = iota
firstMulti
firstCCC
endMulti
firstLeadingCCC
firstCCCZeroExcept
firstStarterWithNLead
lastDecomp
)
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
func makeDecompSet() decompSet {
m := decompSet{}
for i := range m {
m[i] = make(map[string]bool)
}
return m
}
func (m *decompSet) insert(key int, s string) {
m[key][s] = true
}
func printCharInfoTables(w io.Writer) int {
mkstr := func(r rune, f *FormInfo) (int, string) {
d := f.expandedDecomp
s := string([]rune(d))
if max := 1 << 6; len(s) >= max {
const msg = "%U: too many bytes in decomposition: %d >= %d"
log.Fatalf(msg, r, len(s), max)
}
head := uint8(len(s))
if f.quickCheck[MComposed] != QCYes {
head |= 0x40
}
if f.combinesForward {
head |= 0x80
}
s = string([]byte{head}) + s
lccc := ccc(d[0])
tccc := ccc(d[len(d)-1])
cc := ccc(r)
if cc != 0 && lccc == 0 && tccc == 0 {
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
}
if tccc < lccc && lccc != 0 {
const msg = "%U: lccc (%d) must be <= tcc (%d)"
log.Fatalf(msg, r, lccc, tccc)
}
index := normalDecomp
nTrail := chars[r].nTrailingNonStarters
nLead := chars[r].nLeadingNonStarters
if tccc > 0 || lccc > 0 || nTrail > 0 {
tccc <<= 2
tccc |= nTrail
s += string([]byte{tccc})
index = endMulti
for _, r := range d[1:] {
if ccc(r) == 0 {
index = firstCCC
}
}
if lccc > 0 || nLead > 0 {
s += string([]byte{lccc})
if index == firstCCC {
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
}
index = firstLeadingCCC
}
if cc != lccc {
if cc != 0 {
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
}
index = firstCCCZeroExcept
}
} else if len(d) > 1 {
index = firstMulti
}
return index, s
}
decompSet := makeDecompSet()
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
decompSet.insert(firstStarterWithNLead, nLeadStr)
// Store the uniqued decompositions in a byte buffer,
// preceded by their byte length.
for _, c := range chars {
for _, f := range c.forms {
if len(f.expandedDecomp) == 0 {
continue
}
if f.combinesBackward {
log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
}
index, s := mkstr(c.codePoint, &f)
decompSet.insert(index, s)
}
}
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
size := 0
positionMap := make(map[string]uint16)
decompositions.WriteString("\000")
fmt.Fprintln(w, "const (")
for i, m := range decompSet {
sa := []string{}
for s := range m {
sa = append(sa, s)
}
sort.Strings(sa)
for _, s := range sa {
p := decompositions.Len()
decompositions.WriteString(s)
positionMap[s] = uint16(p)
}
if cname[i] != "" {
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
}
}
fmt.Fprintln(w, "maxDecomp = 0x8000")
fmt.Fprintln(w, ")")
b := decompositions.Bytes()
printBytes(w, b, "decomps")
size += len(b)
varnames := []string{"nfc", "nfkc"}
for i := 0; i < FNumberOfFormTypes; i++ {
trie := triegen.NewTrie(varnames[i])
for r, c := range chars {
f := c.forms[i]
d := f.expandedDecomp
if len(d) != 0 {
_, key := mkstr(c.codePoint, &f)
trie.Insert(rune(r), uint64(positionMap[key]))
if c.ccc != ccc(d[0]) {
// We assume the lead ccc of a decomposition !=0 in this case.
if ccc(d[0]) == 0 {
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
}
}
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
// Handle cases where it can't be detected that the nLead should be equal
// to nTrail.
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
trie.Insert(c.codePoint, uint64(0x8000|v))
}
}
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
if err != nil {
log.Fatal(err)
}
size += sz
}
return size
}
func contains(sa []string, s string) bool {
for _, a := range sa {
if a == s {
return true
}
}
return false
}
func makeTables() {
w := &bytes.Buffer{}
size := 0
if *tablelist == "" {
return
}
list := strings.Split(*tablelist, ",")
if *tablelist == "all" {
list = []string{"recomp", "info"}
}
// Compute maximum decomposition size.
max := 0
for _, c := range chars {
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
max = n
}
}
fmt.Fprintln(w, `import "sync"`)
fmt.Fprintln(w)
fmt.Fprintln(w, "const (")
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
fmt.Fprintln(w)
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
fmt.Fprintln(w, ")\n")
// Print the CCC remap table.
size += len(cccMap)
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
for i := 0; i < len(cccMap); i++ {
if i%8 == 0 {
fmt.Fprintln(w)
}
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
}
fmt.Fprintln(w, "\n}\n")
if contains(list, "info") {
size += printCharInfoTables(w)
}
if contains(list, "recomp") {
// Note that we use 32 bit keys, instead of 64 bit.
// This clips the bits of three entries, but we know
// this won't cause a collision. The compiler will catch
// any changes made to UnicodeData.txt that introduces
// a collision.
// Note that the recomposition map for NFC and NFKC
// are identical.
// Recomposition map
nrentries := 0
for _, c := range chars {
f := c.forms[FCanonical]
if !f.isOneWay && len(f.decomp) > 0 {
nrentries++
}
}
sz := nrentries * 8
size += sz
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
fmt.Fprintln(w, "var recompMap map[uint32]rune")
fmt.Fprintln(w, "var recompMapOnce sync.Once\n")
fmt.Fprintln(w, `const recompMapPacked = "" +`)
var buf [8]byte
for i, c := range chars {
f := c.forms[FCanonical]
d := f.decomp
if !f.isOneWay && len(d) > 0 {
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
binary.BigEndian.PutUint32(buf[:4], key)
binary.BigEndian.PutUint32(buf[4:], uint32(i))
fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i))
}
}
// hack so we don't have to special case the trailing plus sign
fmt.Fprintf(w, ` ""`)
fmt.Fprintln(w)
}
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes())
}
func printChars() {
if *verbose {
for _, c := range chars {
if !c.isValid() || c.state == SMissing {
continue
}
fmt.Println(c)
}
}
}
// verifyComputed does various consistency tests.
func verifyComputed() {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
}
isMaybe := f.quickCheck[MComposed] == QCMaybe
if f.combinesBackward != isMaybe {
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
}
if len(f.decomp) > 0 && f.combinesForward && isMaybe {
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
}
if len(f.expandedDecomp) != 0 {
continue
}
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
// We accept these runes to be treated differently (it only affects
// segment breaking in iteration, most likely on improper use), but
// reconsider if more characters are added.
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
log.Fatalf("%U: nLead was %v; want %v", i, a, b)
}
}
}
nfc := c.forms[FCanonical]
nfkc := c.forms[FCompatibility]
if nfc.combinesBackward != nfkc.combinesBackward {
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
}
}
}
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5 ; NFD_QC; N # ...
// 0374 ; NFD_QC; N # ...
// See https://unicode.org/reports/tr44/ for full explanation
func testDerived() {
f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(0)
c := &chars[r]
var ftype, mode int
qt := p.String(1)
switch qt {
case "NFC_QC":
ftype, mode = FCanonical, MComposed
case "NFD_QC":
ftype, mode = FCanonical, MDecomposed
case "NFKC_QC":
ftype, mode = FCompatibility, MComposed
case "NFKD_QC":
ftype, mode = FCompatibility, MDecomposed
default:
continue
}
var qr QCResult
switch p.String(2) {
case "Y":
qr = QCYes
case "N":
qr = QCNo
case "M":
qr = QCMaybe
default:
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
}
if got := c.forms[ftype].quickCheck[mode]; got != qr {
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
}
c.forms[ftype].verified[mode] = true
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
// Any unspecified value must be QCYes. Verify this.
for i, c := range chars {
for j, fd := range c.forms {
for k, qr := range fd.quickCheck {
if !fd.verified[k] && qr != QCYes {
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
log.Printf(m, i, j, k, qr, c.name)
}
}
}
}
}
var testHeader = `const (
Yes = iota
No
Maybe
)
type formData struct {
qc uint8
combinesForward bool
decomposition string
}
type runeData struct {
r rune
ccc uint8
nLead uint8
nTrail uint8
f [2]formData // 0: canonical; 1: compatibility
}
func f(qc uint8, cf bool, dec string) [2]formData {
return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
}
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
}
var testData = []runeData{
`
func printTestdata() {
type lastInfo struct {
ccc uint8
nLead uint8
nTrail uint8
f string
}
last := lastInfo{}
w := &bytes.Buffer{}
fmt.Fprintf(w, testHeader)
for r, c := range chars {
f := c.forms[FCanonical]
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
f = c.forms[FCompatibility]
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
s := ""
if d == dk && qc == qck && cf == cfk {
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
} else {
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
}
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
if last != current {
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
last = current
}
}
fmt.Fprintln(w, "}")
gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes())
}

609
vendor/golang.org/x/text/unicode/norm/normalize.go generated vendored Normal file
View file

@ -0,0 +1,609 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Note: the file data_test.go that is generated should not be checked in.
//go:generate go run maketables.go triegen.go
//go:generate go test -tags test
// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
// NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/.
type Form int
const (
NFC Form = iota
NFD
NFKC
NFKD
)
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
src := inputBytes(b)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
return doAppendInner(&rb, n)
}
// String returns f(s).
func (f Form) String(s string) string {
src := inputString(s)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
return string(doAppendInner(&rb, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
src := inputBytes(b)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
rb.setFlusher(nil, cmpNormalBytes)
for bp < len(b) {
rb.out = b[bp:]
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
}
return true
}
func cmpNormalBytes(rb *reorderBuffer) bool {
b := rb.out
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if int(info.size) > len(b) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if b[0] != rb.byte[p] {
return false
}
b = b[1:]
}
}
return true
}
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
src := inputString(s)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
rb.setFlusher(nil, func(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(s) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if s[bp] != rb.byte[p] {
return false
}
bp++
}
}
return true
})
for bp < len(s) {
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
}
return true
}
// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool {
info, p := lastRuneStart(&rb.f, rb.out)
if p == -1 || info.size == 0 {
return true
}
end := p + int(info.size)
extra := len(rb.out) - end
if extra > 0 {
// Potentially allocating memory. However, this only
// happens with ill-formed UTF-8.
x := make([]byte, 0)
x = append(x, rb.out[len(rb.out)-extra:]...)
rb.out = rb.out[:end]
decomposeToLastBoundary(rb)
rb.doFlush()
rb.out = append(rb.out, x...)
return false
}
buf := rb.out[p:]
rb.out = rb.out[:p]
decomposeToLastBoundary(rb)
if s := rb.ss.next(info); s == ssStarter {
rb.doFlush()
rb.ss.first(info)
} else if s == ssOverflow {
rb.doFlush()
rb.insertCGJ()
rb.ss = 0
}
rb.insertUnsafe(inputBytes(buf), 0, info)
return true
}
func appendQuick(rb *reorderBuffer, i int) int {
if rb.nsrc == i {
return i
}
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
rb.out = rb.src.appendSlice(rb.out, i, end)
return end
}
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
return f.doAppend(out, inputBytes(src), len(src))
}
func (f Form) doAppend(out []byte, src input, n int) []byte {
if n == 0 {
return out
}
ft := formTable[f]
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
if len(out) == 0 {
p, _ := ft.quickSpan(src, 0, n, true)
out = src.appendSlice(out, 0, p)
if p == n {
return out
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
return doAppendInner(&rb, p)
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppend(&rb, out, 0)
}
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
rb.setFlusher(out, appendFlush)
src, n := rb.src, rb.nsrc
doMerge := len(out) > 0
if q := src.skipContinuationBytes(p); q > p {
// Move leading non-starters to destination.
rb.out = src.appendSlice(rb.out, p, q)
p = q
doMerge = patchTail(rb)
}
fd := &rb.f
if doMerge {
var info Properties
if p < n {
info = fd.info(src, p)
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
if p == 0 {
decomposeToLastBoundary(rb)
}
p = decomposeSegment(rb, p, true)
}
}
if info.size == 0 {
rb.doFlush()
// Append incomplete UTF-8 encoding.
return src.appendSlice(rb.out, p, n)
}
if rb.nrune > 0 {
return doAppendInner(rb, p)
}
}
p = appendQuick(rb, p)
return doAppendInner(rb, p)
}
func doAppendInner(rb *reorderBuffer, p int) []byte {
for n := rb.nsrc; p < n; {
p = decomposeSegment(rb, p, true)
p = appendQuick(rb, p)
}
return rb.out
}
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
return f.doAppend(out, inputString(src), len(src))
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
return n
}
// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
if n < len(b) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
if n < len(s) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
// non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
var lastCC uint8
ss := streamSafe(0)
lastSegStart := i
for n = end; i < n; {
if j := src.skipASCII(i, n); i != j {
i = j
lastSegStart = i - 1
lastCC = 0
ss = 0
continue
}
info := f.info(src, i)
if info.size == 0 {
if atEOF {
// include incomplete runes
return n, true
}
return lastSegStart, true
}
// This block needs to be before the next, because it is possible to
// have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) {
case ssStarter:
lastSegStart = i
case ssOverflow:
return lastSegStart, false
case ssSuccess:
if lastCC > info.ccc {
return lastSegStart, false
}
}
if f.composing {
if !info.isYesC() {
break
}
} else {
if !info.isYesD() {
break
}
}
lastCC = info.ccc
i += int(info.size)
}
if i == n {
if !atEOF {
n = lastSegStart
}
return n, true
}
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
return n
}
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
return f.firstBoundary(inputBytes(b), len(b))
}
func (f Form) firstBoundary(src input, nsrc int) int {
i := src.skipContinuationBytes(0)
if i >= nsrc {
return -1
}
fd := formTable[f]
ss := streamSafe(0)
// We should call ss.first here, but we can't as the first rune is
// skipped already. This means FirstBoundary can't really determine
// CGJ insertion points correctly. Luckily it doesn't have to.
for {
info := fd.info(src, i)
if info.size == 0 {
return -1
}
if s := ss.next(info); s != ssSuccess {
return i
}
i += int(info.size)
if i >= nsrc {
if !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
}
}
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
return f.firstBoundary(inputString(s), len(s))
}
// NextBoundary reports the index of the boundary between the first and next
// segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int {
return f.nextBoundary(inputBytes(b), len(b), atEOF)
}
// NextBoundaryInString reports the index of the boundary between the first and
// next segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
return f.nextBoundary(inputString(s), len(s), atEOF)
}
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
if nsrc == 0 {
if atEOF {
return 0
}
return -1
}
fd := formTable[f]
info := fd.info(src, 0)
if info.size == 0 {
if atEOF {
return 1
}
return -1
}
ss := streamSafe(0)
ss.first(info)
for i := int(info.size); i < nsrc; i += int(info.size) {
info = fd.info(src, i)
if info.size == 0 {
if atEOF {
return i
}
return -1
}
// TODO: Using streamSafe to determine the boundary isn't the same as
// using BoundaryBefore. Determine which should be used.
if s := ss.next(info); s != ssSuccess {
return i
}
}
if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
return lastBoundary(formTable[f], b)
}
func lastBoundary(fd *formInfo, b []byte) int {
i := len(b)
info, p := lastRuneStart(fd, b)
if p == -1 {
return -1
}
if info.size == 0 { // ends with incomplete rune
if p == 0 { // starts with incomplete rune
return -1
}
i = p
info, p = lastRuneStart(fd, b[:i])
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
return i
}
}
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
return i
}
if info.BoundaryAfter() {
return i
}
ss := streamSafe(0)
v := ss.backwards(info)
for i = p; i >= 0 && v != ssStarter; i = p {
info, p = lastRuneStart(fd, b[:i])
if v = ss.backwards(info); v == ssOverflow {
break
}
if p+int(info.size) != i {
if p == -1 { // no boundary found
return -1
}
return i // boundary after an illegal UTF-8 encoding
}
}
return i
}
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
// Force one character to be consumed.
info := rb.f.info(rb.src, sp)
if info.size == 0 {
return 0
}
if s := rb.ss.next(info); s == ssStarter {
// TODO: this could be removed if we don't support merging.
if rb.nrune > 0 {
goto end
}
} else if s == ssOverflow {
rb.insertCGJ()
goto end
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
for {
sp += int(info.size)
if sp >= rb.nsrc {
if !atEOF && !info.BoundaryAfter() {
return int(iShortSrc)
}
break
}
info = rb.f.info(rb.src, sp)
if info.size == 0 {
if !atEOF {
return int(iShortSrc)
}
break
}
if s := rb.ss.next(info); s == ssStarter {
break
} else if s == ssOverflow {
rb.insertCGJ()
break
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
}
end:
if !rb.doFlush() {
return int(iShortDst)
}
return sp
}
// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
p := len(buf) - 1
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
}
if p < 0 {
return Properties{}, -1
}
return fd.info(inputBytes(buf), p), p
}
// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer) {
fd := &rb.f
info, i := lastRuneStart(fd, rb.out)
if int(info.size) != len(rb.out)-i {
// illegal trailing continuation bytes
return
}
if info.BoundaryAfter() {
return
}
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
padd := 0
ss := streamSafe(0)
p := len(rb.out)
for {
add[padd] = info
v := ss.backwards(info)
if v == ssOverflow {
// Note that if we have an overflow, it the string we are appending to
// is not correctly normalized. In this case the behavior is undefined.
break
}
padd++
p -= int(info.size)
if v == ssStarter || p < 0 {
break
}
info, i = lastRuneStart(fd, rb.out[:p])
if int(info.size) != p-i {
break
}
}
rb.ss = ss
// Copy bytes for insertion as we may need to overwrite rb.out.
var buf [maxBufferSize * utf8.UTFMax]byte
cp := buf[:copy(buf[:], rb.out[p:])]
rb.out = rb.out[:p]
for padd--; padd >= 0; padd-- {
info = add[padd]
rb.insertUnsafe(inputBytes(cp), 0, info)
cp = cp[info.size:]
}
}

125
vendor/golang.org/x/text/unicode/norm/readwriter.go generated vendored Normal file
View file

@ -0,0 +1,125 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "io"
type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}
// Write implements the standard write interface. If the last characters are
// not at a normalization boundary, the bytes will be buffered for the next
// write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error) {
// Process data in pieces to keep w.buf size bounded.
const chunk = 4000
for len(data) > 0 {
// Normalize into w.buf.
m := len(data)
if m > chunk {
m = chunk
}
w.rb.src = inputBytes(data[:m])
w.rb.nsrc = m
w.buf = doAppend(&w.rb, w.buf, 0)
data = data[m:]
n += m
// Write out complete prefix, save remainder.
// Note that lastBoundary looks back at most 31 runes.
i := lastBoundary(&w.rb.f, w.buf)
if i == -1 {
i = 0
}
if i > 0 {
if _, err = w.w.Write(w.buf[:i]); err != nil {
break
}
bn := copy(w.buf, w.buf[i:])
w.buf = w.buf[:bn]
}
}
return n, err
}
// Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() error {
if len(w.buf) > 0 {
_, err := w.w.Write(w.buf)
if err != nil {
return err
}
}
return nil
}
// Writer returns a new writer that implements Write(b)
// by writing f(b) to w. The returned writer may use an
// internal buffer to maintain state across Write calls.
// Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser {
wr := &normWriter{rb: reorderBuffer{}, w: w}
wr.rb.init(f, nil)
return wr
}
type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}
// Read implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error) {
for {
if r.lastBoundary-r.bufStart > 0 {
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
r.bufStart += n
if r.lastBoundary-r.bufStart > 0 {
return n, nil
}
return n, r.err
}
if r.err != nil {
return 0, r.err
}
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
r.outbuf = r.outbuf[0:outn]
r.bufStart = 0
n, err := r.r.Read(r.inbuf)
r.rb.src = inputBytes(r.inbuf[0:n])
r.rb.nsrc, r.err = n, err
if n > 0 {
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
}
if err == io.EOF {
r.lastBoundary = len(r.outbuf)
} else {
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
if r.lastBoundary == -1 {
r.lastBoundary = 0
}
}
}
}
// Reader returns a new reader that implements Read
// by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.Reader {
const chunk = 4000
buf := make([]byte, chunk)
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
rr.rb.init(f, buf)
return rr
}

7657
vendor/golang.org/x/text/unicode/norm/tables10.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

7693
vendor/golang.org/x/text/unicode/norm/tables11.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

7637
vendor/golang.org/x/text/unicode/norm/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

88
vendor/golang.org/x/text/unicode/norm/transform.go generated vendored Normal file
View file

@ -0,0 +1,88 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// Reset implements the Reset method of the transform.Transformer interface.
func (Form) Reset() {}
// Transform implements the Transform method of the transform.Transformer
// interface. It may need to write segments of up to MaxSegmentSize at once.
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
// least of size MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Cap the maximum number of src bytes to check.
b := src
eof := atEOF
if ns := len(dst); ns < len(b) {
err = transform.ErrShortDst
eof = false
b = b[:ns]
}
i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof)
n := copy(dst, b[:i])
if !ok {
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
return nDst + n, nSrc + n, err
}
if err == nil && n < len(src) && !atEOF {
err = transform.ErrShortSrc
}
return n, n, err
}
func flushTransform(rb *reorderBuffer) bool {
// Write out (must fully fit in dst, or else it is an ErrShortDst).
if len(rb.out) < rb.nrune*utf8.UTFMax {
return false
}
rb.out = rb.out[rb.flushCopy(rb.out):]
return true
}
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc}
// transform implements the transform.Transformer interface. It is only called
// when quickSpan does not pass for a given string.
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// TODO: get rid of reorderBuffer. See CL 23460044.
rb := reorderBuffer{}
rb.init(f, src)
for {
// Load segment into reorder buffer.
rb.setFlusher(dst[nDst:], flushTransform)
end := decomposeSegment(&rb, nSrc, atEOF)
if end < 0 {
return nDst, nSrc, errs[-end]
}
nDst = len(dst) - len(rb.out)
nSrc = end
// Next quickSpan.
end = rb.nsrc
eof := atEOF
if n := nSrc + len(dst) - nDst; n < end {
err = transform.ErrShortDst
end = n
eof = false
}
end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof)
n := copy(dst[nDst:], rb.src.bytes[nSrc:end])
nSrc += n
nDst += n
if ok {
if err == nil && n < rb.nsrc && !atEOF {
err = transform.ErrShortSrc
}
return nDst, nSrc, err
}
}
}

54
vendor/golang.org/x/text/unicode/norm/trie.go generated vendored Normal file
View file

@ -0,0 +1,54 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var nfcSparse = sparseBlocks{
values: nfcSparseValues[:],
offset: nfcSparseOffset[:],
}
var nfkcSparse = sparseBlocks{
values: nfkcSparseValues[:],
offset: nfkcSparseOffset[:],
}
var (
nfcData = newNfcTrie(0)
nfkcData = newNfkcTrie(0)
)
// lookupValue determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}

117
vendor/golang.org/x/text/unicode/norm/triegen.go generated vendored Normal file
View file

@ -0,0 +1,117 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Trie table generator.
// Used by make*tables tools to generate a go file with trie data structures
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table with 16-bit values.
package main
import (
"fmt"
"io"
)
const maxSparseEntries = 16
type normCompacter struct {
sparseBlocks [][]uint64
sparseOffset []uint16
sparseCount int
name string
}
func mostFrequentStride(a []uint64) int {
counts := make(map[int]int)
var v int
for _, x := range a {
if stride := int(x) - v; v != 0 && stride >= 0 {
counts[stride]++
}
v = int(x)
}
var maxs, maxc int
for stride, cnt := range counts {
if cnt > maxc || (cnt == maxc && stride < maxs) {
maxs, maxc = stride, cnt
}
}
return maxs
}
func countSparseEntries(a []uint64) int {
stride := mostFrequentStride(a)
var v, count int
for _, tv := range a {
if int(tv)-v != stride {
if tv != 0 {
count++
}
}
v = int(tv)
}
return count
}
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
if n := countSparseEntries(v); n <= maxSparseEntries {
return (n+1)*4 + 2, true
}
return 0, false
}
func (c *normCompacter) Store(v []uint64) uint32 {
h := uint32(len(c.sparseOffset))
c.sparseBlocks = append(c.sparseBlocks, v)
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
c.sparseCount += countSparseEntries(v) + 1
return h
}
func (c *normCompacter) Handler() string {
return c.name + "Sparse.lookup"
}
func (c *normCompacter) Print(w io.Writer) (retErr error) {
p := func(f string, x ...interface{}) {
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
retErr = err
}
}
ls := len(c.sparseBlocks)
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
ns := c.sparseCount
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
p("var %sSparseValues = [%d]valueRange {", c.name, ns)
for i, b := range c.sparseBlocks {
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
var v int
stride := mostFrequentStride(b)
n := countSparseEntries(b)
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
for i, nv := range b {
if int(nv)-v != stride {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = int(nv)
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(b)-1)
}
}
p("\n}\n\n")
return
}

115
vendor/golang.org/x/text/unicode/rangetable/gen.go generated vendored Normal file
View file

@ -0,0 +1,115 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"reflect"
"strings"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
var versionList = flag.String("versions", "",
"list of versions for which to generate RangeTables")
const bootstrapMessage = `No versions specified.
To bootstrap the code generation, run:
go run gen.go --versions=4.1.0,5.0.0,6.0.0,6.1.0,6.2.0,6.3.0,7.0.0
and ensure that the latest versions are included by checking:
https://www.unicode.org/Public/`
func getVersions() []string {
if *versionList == "" {
log.Fatal(bootstrapMessage)
}
c := collate.New(language.Und, collate.Numeric)
versions := strings.Split(*versionList, ",")
c.SortStrings(versions)
// Ensure that at least the current version is included.
for _, v := range versions {
if v == gen.UnicodeVersion() {
return versions
}
}
versions = append(versions, gen.UnicodeVersion())
c.SortStrings(versions)
return versions
}
func main() {
gen.Init()
versions := getVersions()
w := &bytes.Buffer{}
fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ","))
fmt.Fprintf(w, "import \"unicode\"\n\n")
vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) }
fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n")
for _, v := range versions {
fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v))
}
fmt.Fprintf(w, "}\n\n")
var size int
for _, v := range versions {
assigned := []rune{}
r := gen.Open("https://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
ucd.Parse(r, func(p *ucd.Parser) {
assigned = append(assigned, p.Rune(0))
})
rt := rangetable.New(assigned...)
sz := int(reflect.TypeOf(unicode.RangeTable{}).Size())
sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16)
sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32)
fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024)
fmt.Fprintf(w, "var assigned%s = ", vstr(v))
print(w, rt)
size += sz
}
fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)
gen.WriteVersionedGoFile("tables.go", "rangetable", w.Bytes())
}
func print(w io.Writer, rt *unicode.RangeTable) {
fmt.Fprintln(w, "&unicode.RangeTable{")
fmt.Fprintln(w, "\tR16: []unicode.Range16{")
for _, r := range rt.R16 {
fmt.Fprintf(w, "\t\t{%#04x, %#04x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintln(w, "\tR32: []unicode.Range32{")
for _, r := range rt.R32 {
fmt.Fprintf(w, "\t\t{%#08x, %#08x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintf(w, "\tLatinOffset: %d,\n", rt.LatinOffset)
fmt.Fprintf(w, "}\n\n")
}

260
vendor/golang.org/x/text/unicode/rangetable/merge.go generated vendored Normal file
View file

@ -0,0 +1,260 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"unicode"
)
// atEnd is used to mark a completed iteration.
const atEnd = unicode.MaxRune + 1
// Merge returns a new RangeTable that is the union of the given tables.
// It can also be used to compact user-created RangeTables. The entries in
// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
//
// A lookup in the resulting table can be several times faster than using In
// directly on the ranges. Merge is an expensive operation, however, and only
// makes sense if one intends to use the result for more than a couple of
// hundred lookups.
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
rt := &unicode.RangeTable{}
if len(ranges) == 0 {
return rt
}
iter := tablesIter(make([]tableIndex, len(ranges)))
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R16) > 0 {
iter[i].next = rune(t.R16[0].Lo)
}
}
if r0 := iter.next16(); r0.Stride != 0 {
for {
r1 := iter.next16()
if r1.Stride == 0 {
rt.R16 = append(rt.R16, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r0.Stride = stride
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R16 = append(rt.R16, r0)
r0 = r1
}
}
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R32) > 0 {
iter[i].next = rune(t.R32[0].Lo)
}
}
if r0 := iter.next32(); r0.Stride != 0 {
for {
r1 := iter.next32()
if r1.Stride == 0 {
rt.R32 = append(rt.R32, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R32 = append(rt.R32, r0)
r0 = r1
}
}
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
rt.LatinOffset = i + 1
}
return rt
}
type tableIndex struct {
t *unicode.RangeTable
p uint32
next rune
}
type tablesIter []tableIndex
// sortIter does an insertion sort using the next field of tableIndex. Insertion
// sort is a good sorting algorithm for this case.
func sortIter(t []tableIndex) {
for i := range t {
for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
t[j], t[j-1] = t[j-1], t[j]
}
}
}
// next16 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next16() unicode.Range16 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range16{}
}
r0 := t0.t.R16[t0.p]
r0.Lo = uint16(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R16[tn.p]
rn.Lo = uint16(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R16[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R16) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R16[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}
// next32 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next32() unicode.Range32 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range32{}
}
r0 := t0.t.R32[t0.p]
r0.Lo = uint32(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R32[tn.p]
rn.Lo = uint32(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R32[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R32) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R32[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}

View file

@ -0,0 +1,70 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package rangetable provides utilities for creating and inspecting
// unicode.RangeTables.
package rangetable
import (
"sort"
"unicode"
)
// New creates a RangeTable from the given runes, which may contain duplicates.
func New(r ...rune) *unicode.RangeTable {
if len(r) == 0 {
return &unicode.RangeTable{}
}
sort.Sort(byRune(r))
// Remove duplicates.
k := 1
for i := 1; i < len(r); i++ {
if r[k-1] != r[i] {
r[k] = r[i]
k++
}
}
var rt unicode.RangeTable
for _, r := range r[:k] {
if r <= 0xFFFF {
rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1})
} else {
rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1})
}
}
// Optimize RangeTable.
return Merge(&rt)
}
type byRune []rune
func (r byRune) Len() int { return len(r) }
func (r byRune) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r byRune) Less(i, j int) bool { return r[i] < r[j] }
// Visit visits all runes in the given RangeTable in order, calling fn for each.
func Visit(rt *unicode.RangeTable, fn func(rune)) {
for _, r16 := range rt.R16 {
for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
fn(r)
}
}
for _, r32 := range rt.R32 {
for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
fn(r)
}
}
}
// Assigned returns a RangeTable with all assigned code points for a given
// Unicode version. This includes graphic, format, control, and private-use
// characters. It returns nil if the data for the given version is not
// available.
func Assigned(version string) *unicode.RangeTable {
return assigned[version]
}

Some files were not shown because too many files have changed in this diff Show more