autogenpb/protoReformat.go

586 lines
13 KiB
Go

// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
// Use of this source code is governed by the GPL 3.0
package main
import (
"fmt"
"iter"
"os"
"regexp"
"strings"
sync "sync"
"go.wit.com/log"
)
// like 'goimport', but for .proto files
var allTheLines *LinesScanner
type EnumMessage struct {
msgPB *FormatMsg
all []Message
}
type StdMessage struct {
msgPB *FormatMsg
all []Message
}
func (msg *EnumMessage) name() string {
return "fuckit enum"
}
func (msg *StdMessage) name() string {
if msg.msgPB != nil {
return msg.msgPB.Header
}
return "fuckit std"
}
type Message interface {
name() string
load()
addMsg(Message)
}
func protoReformatComments(filename string) error {
// read in the .proto file
data, err := os.ReadFile(filename)
if err != nil {
log.Info("file read failed", filename, err)
return err
}
var newfile string
log.Info("filename", filename)
alltest := makeLineIter(data)
// gets the max vartype and varname
for line := range alltest {
newfile += fmt.Sprintln(commentPreprocessor(line))
}
newfile = commentPreprocessorFull(newfile)
saveFile(filename, newfile)
return nil
}
func protoReformat(filename string) error {
// read in the .proto file
data, err := os.ReadFile(filename)
if err != nil {
log.Info("file read failed", filename, err)
return err
}
var newfile string
var bigName int64
var bigType int64
var fmtmsg *FormatMsg
fmtmsg = new(FormatMsg)
var inMessage bool
var allLinesIter iter.Seq[string]
allLinesIter = makeLineIter(data)
// gets the max vartype and varname
for line := range allLinesIter {
if strings.HasPrefix(line, "message ") {
inMessage = true
continue
}
// find the end of the message
if strings.HasPrefix(line, "}") {
inMessage = false
setMaxSizes(fmtmsg)
if bigName < fmtmsg.MaxVarname {
bigName = fmtmsg.MaxVarname
}
if bigType < fmtmsg.MaxVartype {
bigType = fmtmsg.MaxVartype
}
fmtmsg = new(FormatMsg)
continue
}
// don't format or change anything when not in a "message {" section
if !inMessage {
continue
}
}
var basemsg *FormatMsg
basemsg = new(FormatMsg)
basemsg.MaxVarname = bigName
basemsg.MaxVartype = bigType
inMessage = false
var comments string
// write out the messages
allTheLines = newLinesScanner(strings.Split(string(data), "\n"))
for allTheLines.Scan() {
line := allTheLines.NextRaw()
if strings.HasPrefix(line, "oneof ") {
newmsg := basemsg.newOneofMessage(line)
newmsg.Notes = strings.Split(comments, "\n")
newmsg.load()
inMessage = true
continue
}
if strings.HasPrefix(line, "enum ") {
newmsg := basemsg.newEnumMessage(line)
newmsg.Notes = strings.Split(comments, "\n")
newmsg.load()
inMessage = true
continue
}
if strings.HasPrefix(line, "message ") {
log.Info("got to message", line)
newmsg := basemsg.newStdMessage(line)
newmsg.Notes = strings.Split(comments, "\n")
newmsg.load()
inMessage = true
continue
}
if inMessage {
comments += fmt.Sprintln(line)
} else {
basemsg.Notes = append(basemsg.Notes, line)
}
}
for _, newline := range basemsg.format() {
newfile += fmt.Sprintln(newline)
}
return saveFile(filename, newfile)
}
func saveFile(filename string, data string) error {
pf, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
log.Info("file open error. permissions?", filename, err)
return err
}
data = strings.TrimSpace(data)
fmt.Fprintln(pf, data)
pf.Close()
// for i, s := range slices.Backward(pf.ToSort) {
return nil
}
func newDepth(fmtmsg *FormatMsg, header string) *FormatMsg {
newmsg := new(FormatMsg)
newmsg.MaxVarname = fmtmsg.MaxVarname
newmsg.MaxVartype = fmtmsg.MaxVartype
newmsg.Header = header
newmsg.Depth = fmtmsg.Depth + 1
return newmsg
}
// func newStdMessage(fmtmsg *FormatMsg, header string) *StdMessage {
func (msgPB *FormatMsg) newStdMessage(header string) *FormatMsg {
newmsg := newDepth(msgPB, header)
newmsg.Type = FormatMsg_MESSAGE
msgPB.Msgs = append(msgPB.Msgs, newmsg)
return newmsg
}
func (msgPB *FormatMsg) newOneofMessage(header string) *FormatMsg {
newmsg := newDepth(msgPB, header)
newmsg.Type = FormatMsg_ONEOF
msgPB.Msgs = append(msgPB.Msgs, newmsg)
return newmsg
}
func (msgPB *FormatMsg) newEnumMessage(header string) *FormatMsg {
newmsg := newDepth(msgPB, header)
newmsg.Type = FormatMsg_ENUM
msgPB.Msgs = append(msgPB.Msgs, newmsg)
return newmsg
}
// proto files can be defined as trees
// func loadMsgDefinition(msg *StdMessage) {
// func (newMsg *EnumMessage) load() {
// func (msg *StdMessage) loadMsgDefinition(msg *StdMessage) {
func (msg *StdMessage) load() {
msg.msgPB.load()
}
func (msg *FormatMsg) load() {
// fmtmsg := msg.msgPB
for allTheLines.Scan() {
line := allTheLines.Next()
if strings.HasPrefix(line, "oneof ") {
newmsg := msg.newOneofMessage(line)
newmsg.load()
continue
}
if strings.HasPrefix(line, "enum ") {
newmsg := msg.newEnumMessage(line)
newmsg.load()
continue
}
if strings.HasPrefix(line, "message ") {
// message inception. search for the architect. don't forget your totem
newmsg := msg.newStdMessage(line)
newmsg.load()
continue
}
if strings.HasPrefix(line, "}") {
msg.Footer = line
return
}
msg.Lines = append(msg.Lines, line)
}
return
}
// returns vartype, varname, id, end
func tokenMsgVar(line string) (string, string, string, string) {
parts := strings.Split(line, ";")
front := parts[0]
end := strings.Join(parts[1:], ";")
var id string
var varname string
var vartype string
parts = strings.Fields(front)
parts, id = slicesPop(parts)
parts, _ = slicesPop(parts) // this is the "=" sign
parts, varname = slicesPop(parts)
vartype = strings.Join(parts, " ")
return vartype, varname, id, end
}
func slicesPop(parts []string) ([]string, string) {
if len(parts) == 0 {
return nil, ""
}
if len(parts) == 1 {
return nil, parts[0]
}
x := len(parts)
end := parts[x-1]
return parts[0 : x-1], end
}
// 'for x := range' syntax using the smartly done golang 1.24 'iter'
func makeLineIter(data []byte) iter.Seq[string] {
items := strings.Split(string(data), "\n")
// log.Println("Made All() Iter.Seq[] with length", len(items))
return func(yield func(string) bool) {
for _, v := range items {
if !yield(v) {
return
}
}
}
}
// func loadEnumDefinition(newMsg *EnumMessage) *EnumMessage {
func (newMsg *EnumMessage) load() {
curPB := newMsg.msgPB
for allTheLines.Scan() {
line := allTheLines.Next()
if strings.HasPrefix(line, "}") {
curPB.Footer = line
return
}
curPB.Lines = append(curPB.Lines, line)
}
}
// find the max length of varname and vartype
func setMaxSizes(curmsg *FormatMsg) {
for _, line := range curmsg.Lines {
parts := strings.Split(line, ";")
if len(parts) < 2 {
// line is blank or just a comment
continue
}
vartype, varname, _, _ := tokenMsgVar(line)
if len(vartype) > int(curmsg.MaxVartype) {
curmsg.MaxVartype = int64(len(vartype))
}
if len(varname) > int(curmsg.MaxVarname) {
curmsg.MaxVarname = int64(len(varname))
}
}
}
// use this for header and footer lines
func (msg *FormatMsg) padBase() string {
var pad string
for i := 1; i < int(msg.Depth); i += 1 {
pad += fmt.Sprintf("%8s", " ")
}
return pad
}
// use this for lines inside the message
func (msg *FormatMsg) pad() string {
var pad string
for i := 0; i < int(msg.Depth); i += 1 {
pad += fmt.Sprintf("%8s", " ")
}
return pad
}
func formatEnum(curmsg *FormatMsg) []string {
var newmsg []string
header := fmt.Sprintf("%s%s // enum depth=%d", curmsg.padBase(), curmsg.Header, curmsg.Depth)
newmsg = append(newmsg, header)
for _, line := range curmsg.Lines {
line = fmt.Sprintf("%s%s", curmsg.pad(), line)
newmsg = append(newmsg, line)
}
footer := fmt.Sprintf("%s%s // enum footer depth=%d", curmsg.padBase(), curmsg.Footer, curmsg.Depth)
newmsg = append(newmsg, footer)
return newmsg
}
func (msg *FormatMsg) format() []string {
switch msg.Type {
case FormatMsg_ENUM:
return formatEnum(msg)
case FormatMsg_MESSAGE:
return formatMessage(msg)
}
return formatMessage(msg)
}
func formatMessage(curmsg *FormatMsg) []string {
var newmsg []string
// print the Notes
for _, line := range curmsg.Notes {
newmsg = append(newmsg, line)
}
if curmsg.Header != "" {
line := fmt.Sprintf("%s%s // msg depth=%d", curmsg.padBase(), curmsg.Header, curmsg.Depth)
parts := strings.Fields(line)
if len(parts) > 3 {
// hack to actually indent comments on the message line itself. you're welcome
start := parts[0] + " " + parts[1] + " " + parts[2]
end := strings.Join(parts[3:], " ")
offset := int(curmsg.MaxVarname) + int(curmsg.MaxVartype) + 16 - len(start)
pad := fmt.Sprintf("%d", offset)
hmm := "%s %" + pad + "s %s // depth=%d"
line = fmt.Sprintf(hmm, start, " ", end, curmsg.Depth)
} else {
line = fmt.Sprintf("%s // len(parts)=%d depth=%d", line, len(parts), curmsg.Depth)
}
newmsg = append(newmsg, line) // " //header")
} else {
if curmsg.Depth != 0 {
newmsg = append(newmsg, "// ERROR: header was blank") // +" //header")
}
}
// find the max length of varname and vartype
setMaxSizes(curmsg)
for _, msg := range curmsg.Msgs {
switch msg.Type {
case FormatMsg_ENUM:
for _, line := range formatEnum(msg) {
line = fmt.Sprintf("%s%s", curmsg.pad(), line)
newmsg = append(newmsg, line)
}
case FormatMsg_MESSAGE:
for _, line := range formatMessage(msg) {
line = fmt.Sprintf("%s%s", curmsg.pad(), line)
newmsg = append(newmsg, line)
}
default:
}
}
for _, line := range curmsg.Lines {
line = strings.TrimSpace(line)
if line == "" {
newmsg = append(newmsg, line)
continue
}
if strings.HasPrefix(line, "//") {
pad := fmt.Sprintf("%d", curmsg.MaxVartype+curmsg.MaxVarname+21)
hmm := "%" + pad + "s %s"
line = fmt.Sprintf(hmm, " ", line) // todo: compute 50
newmsg = append(newmsg, line)
continue
}
mt := fmt.Sprintf("%d", curmsg.MaxVartype)
mv := fmt.Sprintf("%d", curmsg.MaxVarname)
hmm := " %-" + mt + "s %-" + mv + "s = %-3s %s"
vartype, varname, id, end := tokenMsgVar(line)
end = strings.TrimSpace(end)
id = id + ";"
newline := fmt.Sprintf(hmm, vartype, varname, id, end)
newline = strings.TrimRight(newline, " ")
newmsg = append(newmsg, newline)
}
newmsg = append(newmsg, curmsg.Footer) // +" //footer")
return newmsg
}
// DEFINE THE Lines ITERATOR.
// itializes a new iterator.
func newLinesScanner(things []string) *LinesScanner {
return &LinesScanner{things: things}
}
type LinesScanner struct {
sync.Mutex
things []string
index int
}
func (it *LinesScanner) Scan() bool {
if it.index >= len(it.things) {
return false
}
it.Lock()
it.index++
it.Unlock()
return true
}
// does no cleaning of the data
func (it *LinesScanner) NextRaw() string {
if it.index-1 == len(it.things) {
fmt.Println("Next() error in LinesScanner", it.index)
}
return it.things[it.index-1]
}
// trims whitespace
func (it *LinesScanner) Next() string {
if it.index-1 == len(it.things) {
fmt.Println("Next() error in LinesScanner", it.index)
}
// out := commentPreprocessor(it.things[it.index-1])
out := it.things[it.index-1]
return strings.TrimSpace(out)
// return out
}
// END DEFINE THE ITERATOR
// turns: "/* test */ reserved /* linkPreviews */ 4;"
// into: reserved 1; // test // linkPreviews
func commentPreprocessor(line string) string {
// Match all /* comment */ blocks
re := regexp.MustCompile(`/\*([^*]+)\*/`)
matches := re.FindAllStringSubmatch(line, -1)
// Extract just the comment texts
var comments []string
for _, match := range matches {
comments = append(comments, strings.TrimSpace(match[1]))
// comments = append(comments, match[1])
}
// Remove the block comments from the original line
line = re.ReplaceAllString(line, "")
// line = strings.TrimSpace(line)
line = strings.TrimSuffix(line, " ")
// Append comments at the end with //
for _, comment := range comments {
line += " // " + comment
}
return line
}
// /* this
// - thing
// */
//
// becomes
//
// this
// thing
func commentPreprocessorFull(full string) string {
// Match all /* comment */ blocks
// re := regexp.MustCompile(`/\*([^*]+)\*/`)
re := regexp.MustCompile(`(?s)/\*(.*?)\*/`)
return re.ReplaceAllStringFunc(full, func(s string) string {
log.Info("FOUND:\n", s)
lines := strings.Split(s, "\n")
var cleaned []string
for _, line := range lines {
trimmed := strings.TrimSpace(line)
switch {
case strings.HasPrefix(trimmed, "/*"):
trimmed = trimCommentPrefix(trimmed)
case strings.HasPrefix(trimmed, "*/"):
trimmed = strings.TrimPrefix(trimmed, "*/")
case strings.HasPrefix(trimmed, "*"):
trimmed = strings.TrimPrefix(trimmed, "*")
}
trimmed = "// " + trimmed
cleaned = append(cleaned, strings.TrimSpace(trimmed))
}
s = strings.Join(cleaned, "\n")
log.Info("NOW:\n", s)
return s
})
}
func trimCommentPrefix(line string) string {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "/") {
i := 1
for i < len(trimmed) && trimmed[i] == '*' {
i++
}
if i > 1 {
return strings.TrimSpace(trimmed[i:])
}
}
if strings.HasPrefix(trimmed, "*") {
return strings.TrimSpace(trimmed[1:])
}
if trimmed == "*/" {
return ""
}
return trimmed
}