// Copyright 2017-2025 WIT.COM Inc. All rights reserved. // Use of this source code is governed by the GPL 3.0 package main import ( "bytes" "fmt" "io" "log" "reflect" "unicode/utf8" "go.wit.com/lib/protobuf/bugpb" "go.wit.com/lib/protobuf/forgepb" "golang.org/x/text/encoding/charmap" "google.golang.org/protobuf/proto" ) func doDebug() { me.forge = forgepb.InitPB() me.forge.ScanGoSrc() if err := me.forge.ConfigSave(); err != nil { if err := me.forge.Repos.ConfigSave(); err != nil { err := ValidateProtoUTF8(me.forge.Repos) if err != nil { log.Printf("Protobuf UTF-8 validation failed: %v\n", err) } if err := bugpb.SanitizeProtoUTF8(me.forge.Repos); err != nil { log.Fatalf("Sanitization failed: %v", err) } } // badExit(err) } me.forge.SetConfigSave(true) me.forge.Exit() okExit("this never runs") } // ValidateProtoUTF8 checks all string fields in a proto.Message recursively. func ValidateProtoUTF8(msg proto.Message) error { return validateValue(reflect.ValueOf(msg), "") } func validateValue(val reflect.Value, path string) error { if !val.IsValid() { return nil } if val.Kind() == reflect.Ptr { if val.IsNil() { return nil } return validateValue(val.Elem(), path) } switch val.Kind() { case reflect.Struct: for i := 0; i < val.NumField(); i++ { field := val.Field(i) fieldType := val.Type().Field(i) fieldPath := fmt.Sprintf("%s.%s", path, fieldType.Name) if err := validateValue(field, fieldPath); err != nil { return err } } case reflect.String: s := val.String() if !utf8.ValidString(s) { return fmt.Errorf("invalid UTF-8 string at %s: %q", path, s) } case reflect.Slice: if val.Type().Elem().Kind() == reflect.Uint8 { return nil // skip []byte } for i := 0; i < val.Len(); i++ { if err := validateValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil { return err } } case reflect.Map: for _, key := range val.MapKeys() { valItem := val.MapIndex(key) if err := validateValue(valItem, fmt.Sprintf("%s[%v]", path, key)); err != nil { return err } } } return nil } // SanitizeProtoUTF8 fixes all invalid UTF-8 strings in a proto.Message recursively. func SanitizeProtoUTF8(msg proto.Message) error { return sanitizeValue(reflect.ValueOf(msg), "") } func sanitizeValue(val reflect.Value, path string) error { if !val.IsValid() { return nil } if val.Kind() == reflect.Ptr { if val.IsNil() { return nil } return sanitizeValue(val.Elem(), path) } switch val.Kind() { case reflect.Struct: for i := 0; i < val.NumField(); i++ { field := val.Field(i) fieldType := val.Type().Field(i) if !field.CanSet() { continue } if err := sanitizeValue(field, fmt.Sprintf("%s.%s", path, fieldType.Name)); err != nil { return err } } case reflect.String: s := val.String() if !utf8.ValidString(s) { utf8Str, err := latin1ToUTF8(s) if err != nil { return fmt.Errorf("failed to convert %s to UTF-8: %v", path, err) } val.SetString(utf8Str) } case reflect.Slice: if val.Type().Elem().Kind() == reflect.Uint8 { return nil // skip []byte } for i := 0; i < val.Len(); i++ { if err := sanitizeValue(val.Index(i), fmt.Sprintf("%s[%d]", path, i)); err != nil { return err } } case reflect.Map: for _, key := range val.MapKeys() { valItem := val.MapIndex(key) newItem := reflect.New(valItem.Type()).Elem() newItem.Set(valItem) if err := sanitizeValue(newItem, fmt.Sprintf("%s[%v]", path, key)); err != nil { return err } val.SetMapIndex(key, newItem) } } return nil } func latin1ToUTF8(input string) (string, error) { reader := charmap.ISO8859_1.NewDecoder().Reader(bytes.NewReader([]byte(input))) result, err := io.ReadAll(reader) if err != nil { return "", err } return string(result), nil }