Compare commits

..

4 Commits

Author SHA1 Message Date
Jeff Carr 502e0854e7 add some thoughts 2025-09-12 10:13:10 -05:00
Jeff Carr 4d61cadd81 fixes UTF8 from really old git commits in uboot 2025-06-29 02:49:00 -05:00
Jeff Carr 2ee8d4947b playing around with forge patching 2025-06-04 06:30:03 -05:00
Jeff Carr 3b0c528b68 misc 2025-05-23 18:15:42 -05:00
3 changed files with 165 additions and 1 deletions

View File

@ -1,3 +1,21 @@
This package should be a GO Primitive package and
therefore have a minimum go.sum file.
TODO: Appeal to have google.golang.org/protobuf moved into GO itself
# protobuf definition files for git-bug # protobuf definition files for git-bug
This is an expirement to see if this will be useful This is to make a easy way to parse the bugs from
the federated git-bug system. Search can be implemented
on top of this protobuf definition instead of being mixed
in with the database code.
#####
The above was the original intent of this package and
probably will still be that way, but, this is a good
name for "bugs" in protobufs themselves and I've
moved things like the Validate UTF-8 things here.
In general, maybe it'll be more of a toolbox of common
useful protobuf routines

145
SanitizeUTF8.go Normal file
View File

@ -0,0 +1,145 @@
// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
// Use of this source code is governed by the GPL 3.0
package bugpb
import (
"bytes"
"fmt"
"io"
"reflect"
"unicode/utf8"
"golang.org/x/text/encoding/charmap"
"google.golang.org/protobuf/proto"
)
// ValidateProtoUTF8 checks all string fields in a proto.Message recursively.
func ValidateProtoUTF8(msg proto.Message) error {
return validateValue(reflect.ValueOf(msg), "")
}
func validateValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return validateValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name)
if err := validateValue(field, fieldPath); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
}
}
return nil
}
// SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively.
func SanitizeProtoUTF8(msg proto.Message) error {
return sanitizeValue(reflect.ValueOf(msg), "")
}
func sanitizeValue(val reflect.Value, path string) error {
if !val.IsValid() {
return nil
}
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return nil
}
return sanitizeValue(val.Elem(), path)
}
switch val.Kind() {
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := val.Type().Field(i)
if !field.CanSet() {
continue
}
if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil {
return err
}
}
case reflect.String:
s := val.String()
if !utf8.ValidString(s) {
utf8Str, err := latin1ToUTF8(s)
if err != nil {
return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err)
}
val.SetString(utf8Str)
}
case reflect.Slice:
if val.Type().Elem().Kind() == reflect.Uint8 {
return nil // skip []byte
}
for i := 0; i < val.Len(); i++ {
if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil {
return err
}
}
case reflect.Map:
for _, key := range val.MapKeys() {
valItem := val.MapIndex(key)
newItem := reflect.New(valItem.Type()).Elem()
newItem.Set(valItem)
if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil {
return err
}
val.SetMapIndex(key, newItem)
}
}
return nil
}
func latin1ToUTF8(input string) (string, error) {
reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input)))
result, err := io.ReadAll(reader)
if err != nil {
return "", err
}
return string(result), nil
}

View File

@ -10,6 +10,7 @@ import "google/protobuf/any.proto"; // Import 'Any'
message Bug { message Bug {
string uuid = 1; string uuid = 1;
string name = 2; string name = 2;
string subject = 3;
} }
message Bugs { // `autogenpb:marshal` message Bugs { // `autogenpb:marshal`