Compare commits

...

3 Commits

Author SHA1 Message Date
Jeff Carr 502e0854e7 add some thoughts 2025-09-12 10:13:10 -05:00
Jeff Carr 4d61cadd81 fixes UTF8 from really old git commits in uboot 2025-06-29 02:49:00 -05:00
Jeff Carr 2ee8d4947b playing around with forge patching 2025-06-04 06:30:03 -05:00
3 changed files with 161 additions and 0 deletions

View File

@ -1,6 +1,21 @@
This package should be a GO Primitive package and
therefore have a minimum go.sum file.
TODO: Appeal to have google.golang.org/protobuf moved into GO itself
# protobuf definition files for git-bug # protobuf definition files for git-bug
This is to make a easy way to parse the bugs from This is to make a easy way to parse the bugs from
the federated git-bug system. Search can be implemented the federated git-bug system. Search can be implemented
on top of this protobuf definition instead of being mixed on top of this protobuf definition instead of being mixed
in with the database code. in with the database code.
#####
The above was the original intent of this package and
probably will still be that way, but, this is a good
name for "bugs" in protobufs themselves and I've
moved things like the Validate UTF-8 things here.
In general, maybe it'll be more of a toolbox of common
useful protobuf routines

145
SanitizeUTF8.go Normal file
View File

@ -0,0 +1,145 @@
// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
// Use of this source code is governed by the GPL 3.0
package bugpb
import (
"bytes"
"fmt"
"io"
"reflect"
"unicode/utf8"
"golang.org/x/text/encoding/charmap"
"google.golang.org/protobuf/proto"
)
// ValidateProtoUTF8 checks all string fields in a proto.Message recursively.
func ValidateProtoUTF8(msg proto.Message) error {
return validateValue(reflect.ValueOf(msg), "")
}
func validateValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return validateValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name)
if err := validateValue(field, fieldPath); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
}
}
return nil
}
// SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively.
func SanitizeProtoUTF8(msg proto.Message) error {
return sanitizeValue(reflect.ValueOf(msg), "")
}
func sanitizeValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return sanitizeValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
if !field.CanSet() {
continue
}
if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
utf8Str, err := latin1ToUTF8(s)
if err != nil {
return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err)
}
val.SetString(utf8Str)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
newItem := reflect.New(valItem.Type()).Elem()
newItem.Set(valItem)
if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
val.SetMapIndex(key, newItem)
}
}
return nil
}
func latin1ToUTF8(input string) (string, error) {
reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input)))
result, err := io.ReadAll(reader)
if err != nil {
return "", err
}
return string(result), nil
}

View File

@ -10,6 +10,7 @@ import "google/protobuf/any.proto"; // Import 'Any'
message Bug { message Bug {
string uuid = 1; string uuid = 1;
string name = 2; string name = 2;
string subject = 3;
} }
message Bugs { // `autogenpb:marshal` message Bugs { // `autogenpb:marshal`