go-ethereum/vendor/github.com/naoina/go-stringutil/strings.go

321 lines
8.0 KiB
Go

package stringutil
import (
"sync"
"unicode"
"unicode/utf8"
)
var (
mu sync.Mutex
// Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
commonInitialismMap = map[string]struct{}{
"API": struct{}{},
"ASCII": struct{}{},
"CPU": struct{}{},
"CSRF": struct{}{},
"CSS": struct{}{},
"DNS": struct{}{},
"EOF": struct{}{},
"GUID": struct{}{},
"HTML": struct{}{},
"HTTP": struct{}{},
"HTTPS": struct{}{},
"ID": struct{}{},
"IP": struct{}{},
"JSON": struct{}{},
"LHS": struct{}{},
"QPS": struct{}{},
"RAM": struct{}{},
"RHS": struct{}{},
"RPC": struct{}{},
"SLA": struct{}{},
"SMTP": struct{}{},
"SQL": struct{}{},
"SSH": struct{}{},
"TCP": struct{}{},
"TLS": struct{}{},
"TTL": struct{}{},
"UDP": struct{}{},
"UI": struct{}{},
"UID": struct{}{},
"UUID": struct{}{},
"URI": struct{}{},
"URL": struct{}{},
"UTF8": struct{}{},
"VM": struct{}{},
"XML": struct{}{},
"XSRF": struct{}{},
"XSS": struct{}{},
}
commonInitialisms = keys(commonInitialismMap)
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
longestLen = longestLength(commonInitialisms)
shortestLen = shortestLength(commonInitialisms, longestLen)
)
// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
// It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
func ToUpperCamelCase(s string) string {
if s == "" {
return ""
}
upper := true
start := 0
result := make([]byte, 0, len(s))
var runeBuf [utf8.UTFMax]byte
var initialism []byte
for _, c := range s {
if c == '_' {
upper = true
candidate := string(result[start:])
initialism = initialism[:0]
for _, r := range candidate {
if r < utf8.RuneSelf {
initialism = append(initialism, toUpperASCII(byte(r)))
} else {
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
initialism = append(initialism, runeBuf[:n]...)
}
}
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
result = append(result[:start], initialism...)
}
start = len(result)
continue
}
if upper {
if c < utf8.RuneSelf {
result = append(result, toUpperASCII(byte(c)))
} else {
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
result = append(result, runeBuf[:n]...)
}
upper = false
continue
}
if c < utf8.RuneSelf {
result = append(result, byte(c))
} else {
n := utf8.EncodeRune(runeBuf[:], c)
result = append(result, runeBuf[:n]...)
}
}
candidate := string(result[start:])
initialism = initialism[:0]
for _, r := range candidate {
if r < utf8.RuneSelf {
initialism = append(initialism, toUpperASCII(byte(r)))
} else {
n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
initialism = append(initialism, runeBuf[:n]...)
}
}
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
result = append(result[:start], initialism...)
}
return string(result)
}
// ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
// only the ASCII characters.
// ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
// contains non-ASCII characters.
func ToUpperCamelCaseASCII(s string) string {
if s == "" {
return ""
}
upper := true
start := 0
result := make([]byte, 0, len(s))
var initialism []byte
for i := 0; i < len(s); i++ {
c := s[i]
if c == '_' {
upper = true
candidate := result[start:]
initialism = initialism[:0]
for _, b := range candidate {
initialism = append(initialism, toUpperASCII(b))
}
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
result = append(result[:start], initialism...)
}
start = len(result)
continue
}
if upper {
result = append(result, toUpperASCII(c))
upper = false
continue
}
result = append(result, c)
}
candidate := result[start:]
initialism = initialism[:0]
for _, b := range candidate {
initialism = append(initialism, toUpperASCII(b))
}
if length := commonInitialism.LookupByBytes(initialism); length > 0 {
result = append(result[:start], initialism...)
}
return string(result)
}
// ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
// It will insert letter of '_' at position of previous letter of uppercase and all
// letters convert to lower case.
// ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
func ToSnakeCase(s string) string {
if s == "" {
return ""
}
result := make([]byte, 0, len(s))
var runeBuf [utf8.UTFMax]byte
var j, skipCount int
for i, c := range s {
if i < skipCount {
continue
}
if unicode.IsUpper(c) {
if i != 0 {
result = append(result, '_')
}
next := nextIndex(j, len(s))
if length := commonInitialism.Lookup(s[j:next]); length > 0 {
for _, r := range s[j : j+length] {
if r < utf8.RuneSelf {
result = append(result, toLowerASCII(byte(r)))
} else {
n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
result = append(result, runeBuf[:n]...)
}
}
j += length - 1
skipCount = i + length
continue
}
}
if c < utf8.RuneSelf {
result = append(result, toLowerASCII(byte(c)))
} else {
n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
result = append(result, runeBuf[:n]...)
}
j++
}
return string(result)
}
// ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
// characters.
// ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
// contains non-ASCII characters.
func ToSnakeCaseASCII(s string) string {
if s == "" {
return ""
}
result := make([]byte, 0, len(s))
for i := 0; i < len(s); i++ {
c := s[i]
if isUpperASCII(c) {
if i != 0 {
result = append(result, '_')
}
if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
result = append(result, toLowerASCII(buf[j]))
}
i += length - 1
continue
}
}
}
result = append(result, toLowerASCII(c))
}
return string(result)
}
// AddCommonInitialism adds ss to list of common initialisms.
func AddCommonInitialism(ss ...string) {
mu.Lock()
defer mu.Unlock()
for _, s := range ss {
commonInitialismMap[s] = struct{}{}
}
commonInitialisms = keys(commonInitialismMap)
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
longestLen = longestLength(commonInitialisms)
shortestLen = shortestLength(commonInitialisms, longestLen)
}
// DelCommonInitialism deletes ss from list of common initialisms.
func DelCommonInitialism(ss ...string) {
mu.Lock()
defer mu.Unlock()
for _, s := range ss {
delete(commonInitialismMap, s)
}
commonInitialisms = keys(commonInitialismMap)
commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
longestLen = longestLength(commonInitialisms)
shortestLen = shortestLength(commonInitialisms, longestLen)
}
func isUpperASCII(c byte) bool {
return 'A' <= c && c <= 'Z'
}
func isLowerASCII(c byte) bool {
return 'a' <= c && c <= 'z'
}
func toUpperASCII(c byte) byte {
if isLowerASCII(c) {
return c - ('a' - 'A')
}
return c
}
func toLowerASCII(c byte) byte {
if isUpperASCII(c) {
return c + 'a' - 'A'
}
return c
}
func nextIndex(i, maxlen int) int {
if n := i + longestLen; n < maxlen {
return n
}
return maxlen
}
func keys(m map[string]struct{}) []string {
result := make([]string, 0, len(m))
for k := range m {
result = append(result, k)
}
return result
}
func shortestLength(strs []string, shortest int) int {
for _, s := range strs {
if candidate := utf8.RuneCountInString(s); candidate < shortest {
shortest = candidate
}
}
return shortest
}
func longestLength(strs []string) (longest int) {
for _, s := range strs {
if candidate := utf8.RuneCountInString(s); candidate > longest {
longest = candidate
}
}
return longest
}