From 2616d0d8b45f98b0946b08bb15d047753d47949d Mon Sep 17 00:00:00 2001 From: Jeff Carr Date: Sun, 29 Jun 2025 02:48:47 -0500 Subject: [PATCH] add a way to debug protobuf Marshal() errors --- argv.go | 5 ++ argvAutoshell.go | 6 +- doDebug.go | 168 +++++++++++++++++++++++++++++++++++++++++++++++ main.go | 6 ++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 doDebug.go diff --git a/argv.go b/argv.go index fd281b7..6676a6c 100644 --- a/argv.go +++ b/argv.go @@ -19,6 +19,7 @@ type args struct { Clean *CleanCmd `arg:"subcommand:clean" help:"start over at the beginning"` Commit *CommitCmd `arg:"subcommand:commit" help:"'git commit' but errors out if on wrong branch"` Config *ConfigCmd `arg:"subcommand:config" help:"show your .config/forge/ settings"` + Debug *DebugCmd `arg:"subcommand:debug" help:"debug forge"` Dirty *DirtyCmd `arg:"subcommand:dirty" help:"show repos git says are dirty"` GitFetch *FindCmd `arg:"subcommand:fetch" help:"run 'git fetch master'"` List *FindCmd `arg:"subcommand:list" help:"print a table of the current repos"` @@ -90,6 +91,10 @@ type ConfigCmd struct { Register string `arg:"--register" help:"register your git URL (foo.com/mystuff) or (github.com/foo/bar)"` } +type DebugCmd struct { + Config *EmptyCmd `arg:"subcommand:config" help:"used to debug protobuf Marshal() if things go wrong"` +} + type CheckoutCmd struct { User *FindCmd `arg:"subcommand:user" help:"git checkout user"` Devel *FindCmd `arg:"subcommand:devel" help:"git checkout devel"` diff --git a/argvAutoshell.go b/argvAutoshell.go index e9dbd9e..c69ba70 100644 --- a/argvAutoshell.go +++ b/argvAutoshell.go @@ -34,9 +34,11 @@ func (args) doBashAuto() { case "commit": fmt.Println("--all") case "config": - fmt.Println("add fix list") + fmt.Println("add fix list debug") case "delete": deleteMatch() + case "debug": + fmt.Println("config") case "dirty": fmt.Println("") case "examine": @@ -60,7 +62,7 @@ func (args) doBashAuto() { default: if argv.BashAuto[0] == ARGNAME { // list the subcommands here - fmt.Println("--bash list checkout clean commit config dirty fetch patch pull sync") + fmt.Println("--bash list checkout clean commit config dirty debug fetch patch pull sync") } } os.Exit(0) diff --git a/doDebug.go b/doDebug.go new file mode 100644 index 0000000..f900332 --- /dev/null +++ b/doDebug.go @@ -0,0 +1,168 @@ +// Copyright 2017-2025 WIT.COM Inc. All rights reserved. +// Use of this source code is governed by the GPL 3.0 + +package main + +import ( + "bytes" + "fmt" + "io" + "log" + "reflect" + "unicode/utf8" + + "go.wit.com/lib/protobuf/bugpb" + "go.wit.com/lib/protobuf/forgepb" + "golang.org/x/text/encoding/charmap" + "google.golang.org/protobuf/proto" +) + +func doDebug() { + me.forge = forgepb.InitPB() + me.forge.ScanGoSrc() + if err := me.forge.ConfigSave(); err != nil { + if err := me.forge.Repos.ConfigSave(); err != nil { + err := ValidateProtoUTF8(me.forge.Repos) + if err != nil { + log.Printf("Protobuf UTF-8 validation failed: %v\n", err) + } + if err := bugpb.SanitizeProtoUTF8(me.forge.Repos); err != nil { + log.Fatalf("Sanitization failed: %v", err) + } + } + // badExit(err) + } + me.forge.SetConfigSave(true) + me.forge.Exit() + okExit("this never runs") +} + +// ValidateProtoUTF8 checks all string fields in a proto.Message recursively. +func ValidateProtoUTF8(msg proto.Message) error { + return validateValue(reflect.ValueOf(msg), "") +} + +func validateValue(val reflect.Value, path string) error { + if !val.IsValid() { + return nil + } + + if val.Kind() == reflect.Ptr { + if val.IsNil() { + return nil + } + return validateValue(val.Elem(), path) + } + + switch val.Kind() { + case reflect.Struct: + for i := 0; i < val.NumField(); i++ { + field := val.Field(i) + fieldType := val.Type().Field(i) + fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name) + if err := validateValue(field, fieldPath); err != nil { + return err + } + } + + case reflect.String: + s := val.String() + if !utf8.ValidString(s) { + return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s) + } + + case reflect.Slice: + if val.Type().Elem().Kind() == reflect.Uint8 { + return nil // skip []byte + } + for i := 0; i < val.Len(); i++ { + if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil { + return err + } + } + + case reflect.Map: + for _, key := range val.MapKeys() { + valItem := val.MapIndex(key) + if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil { + return err + } + } + } + + return nil +} + +// SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively. +func SanitizeProtoUTF8(msg proto.Message) error { + return sanitizeValue(reflect.ValueOf(msg), "") +} + +func sanitizeValue(val reflect.Value, path string) error { + if !val.IsValid() { + return nil + } + + if val.Kind() == reflect.Ptr { + if val.IsNil() { + return nil + } + return sanitizeValue(val.Elem(), path) + } + + switch val.Kind() { + case reflect.Struct: + for i := 0; i < val.NumField(); i++ { + field := val.Field(i) + fieldType := val.Type().Field(i) + if !field.CanSet() { + continue + } + if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil { + return err + } + } + + case reflect.String: + s := val.String() + if !utf8.ValidString(s) { + utf8Str, err := latin1ToUTF8(s) + if err != nil { + return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err) + } + val.SetString(utf8Str) + } + + case reflect.Slice: + if val.Type().Elem().Kind() == reflect.Uint8 { + return nil // skip []byte + } + for i := 0; i < val.Len(); i++ { + if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil { + return err + } + } + + case reflect.Map: + for _, key := range val.MapKeys() { + valItem := val.MapIndex(key) + newItem := reflect.New(valItem.Type()).Elem() + newItem.Set(valItem) + if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil { + return err + } + val.SetMapIndex(key, newItem) + } + } + + return nil +} + +func latin1ToUTF8(input string) (string, error) { + reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input))) + result, err := io.ReadAll(reader) + if err != nil { + return "", err + } + return string(result), nil +} diff --git a/main.go b/main.go index 2cf2ed9..faed9c5 100644 --- a/main.go +++ b/main.go @@ -64,6 +64,12 @@ func main() { } me.urlbase = strings.Trim(me.urlbase, "/") // track down why trailing '/' makes http POST not work + // internally debugging can be triggered here before Init() + if argv.Debug != nil { + doDebug() + okExit("") + } + // load the ~/.config/forge/ config me.forge = forgepb.Init() me.found = new(gitpb.Repos)