Compare commits
No commits in common. "master" and "v0.0.3" have entirely different histories.
15
README.md
15
README.md
|
@ -1,21 +1,6 @@
|
||||||
This package should be a GO Primitive package and
|
|
||||||
therefore have a minimum go.sum file.
|
|
||||||
|
|
||||||
TODO: Appeal to have google.golang.org/protobuf moved into GO itself
|
|
||||||
|
|
||||||
# protobuf definition files for git-bug
|
# protobuf definition files for git-bug
|
||||||
|
|
||||||
This is to make a easy way to parse the bugs from
|
This is to make a easy way to parse the bugs from
|
||||||
the federated git-bug system. Search can be implemented
|
the federated git-bug system. Search can be implemented
|
||||||
on top of this protobuf definition instead of being mixed
|
on top of this protobuf definition instead of being mixed
|
||||||
in with the database code.
|
in with the database code.
|
||||||
|
|
||||||
#####
|
|
||||||
|
|
||||||
The above was the original intent of this package and
|
|
||||||
probably will still be that way, but, this is a good
|
|
||||||
name for "bugs" in protobufs themselves and I've
|
|
||||||
moved things like the Validate UTF-8 things here.
|
|
||||||
|
|
||||||
In general, maybe it'll be more of a toolbox of common
|
|
||||||
useful protobuf routines
|
|
||||||
|
|
145
SanitizeUTF8.go
145
SanitizeUTF8.go
|
@ -1,145 +0,0 @@
|
||||||
// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
|
|
||||||
// Use of this source code is governed by the GPL 3.0
|
|
||||||
|
|
||||||
package bugpb
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"reflect"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"golang.org/x/text/encoding/charmap"
|
|
||||||
"google.golang.org/protobuf/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ValidateProtoUTF8 checks all string fields in a proto.Message recursively.
|
|
||||||
func ValidateProtoUTF8(msg proto.Message) error {
|
|
||||||
return validateValue(reflect.ValueOf(msg), "")
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateValue(val reflect.Value, path string) error {
|
|
||||||
if !val.IsValid() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if val.Kind() == reflect.Ptr {
|
|
||||||
if val.IsNil() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return validateValue(val.Elem(), path)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch val.Kind() {
|
|
||||||
case reflect.Struct:
|
|
||||||
for i := 0; i < val.NumField(); i++ {
|
|
||||||
field := val.Field(i)
|
|
||||||
fieldType := val.Type().Field(i)
|
|
||||||
fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name)
|
|
||||||
if err := validateValue(field, fieldPath); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.String:
|
|
||||||
s := val.String()
|
|
||||||
if !utf8.ValidString(s) {
|
|
||||||
return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s)
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.Slice:
|
|
||||||
if val.Type().Elem().Kind() == reflect.Uint8 {
|
|
||||||
return nil // skip []byte
|
|
||||||
}
|
|
||||||
for i := 0; i < val.Len(); i++ {
|
|
||||||
if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.Map:
|
|
||||||
for _, key := range val.MapKeys() {
|
|
||||||
valItem := val.MapIndex(key)
|
|
||||||
if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively.
|
|
||||||
func SanitizeProtoUTF8(msg proto.Message) error {
|
|
||||||
return sanitizeValue(reflect.ValueOf(msg), "")
|
|
||||||
}
|
|
||||||
|
|
||||||
func sanitizeValue(val reflect.Value, path string) error {
|
|
||||||
if !val.IsValid() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if val.Kind() == reflect.Ptr {
|
|
||||||
if val.IsNil() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return sanitizeValue(val.Elem(), path)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch val.Kind() {
|
|
||||||
case reflect.Struct:
|
|
||||||
for i := 0; i < val.NumField(); i++ {
|
|
||||||
field := val.Field(i)
|
|
||||||
fieldType := val.Type().Field(i)
|
|
||||||
if !field.CanSet() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.String:
|
|
||||||
s := val.String()
|
|
||||||
if !utf8.ValidString(s) {
|
|
||||||
utf8Str, err := latin1ToUTF8(s)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err)
|
|
||||||
}
|
|
||||||
val.SetString(utf8Str)
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.Slice:
|
|
||||||
if val.Type().Elem().Kind() == reflect.Uint8 {
|
|
||||||
return nil // skip []byte
|
|
||||||
}
|
|
||||||
for i := 0; i < val.Len(); i++ {
|
|
||||||
if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case reflect.Map:
|
|
||||||
for _, key := range val.MapKeys() {
|
|
||||||
valItem := val.MapIndex(key)
|
|
||||||
newItem := reflect.New(valItem.Type()).Elem()
|
|
||||||
newItem.Set(valItem)
|
|
||||||
if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
val.SetMapIndex(key, newItem)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func latin1ToUTF8(input string) (string, error) {
|
|
||||||
reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input)))
|
|
||||||
result, err := io.ReadAll(reader)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return string(result), nil
|
|
||||||
}
|
|
Loading…
Reference in New Issue