autogenpb/protoReformat.go

// Copyright 2017-2025 WIT.COM Inc. All rights reserved.
// Use of this source code is governed by the GPL 3.0

package main

import (
	"fmt"
	"iter"
	"os"
	"regexp"
	"strings"
	sync "sync"

	"go.wit.com/log"
)

// like 'goimport', but for .proto files

var allTheLines *LinesScanner

type EnumMessage struct {
	msgPB *FormatMsg
	all   []Message
}

type StdMessage struct {
	msgPB *FormatMsg
	all   []Message
}

func (msg *EnumMessage) name() string {
	return "fuckit enum"
}

func (msg *StdMessage) name() string {
	if msg.msgPB != nil {
		return msg.msgPB.Header
	}
	return "fuckit std"
}

type Message interface {
	name() string
	load()
	addMsg(Message)
}

func protoReformatComments(filename string) error {
	// read in the .proto file
	data, err := os.ReadFile(filename)
	if err != nil {
		log.Info("file read failed", filename, err)
		return err
	}

	var newfile string

	log.Info("filename", filename)
	alltest := makeLineIter(data)
	// gets the max vartype and varname
	for line := range alltest {
		newfile += fmt.Sprintln(commentPreprocessor(line))
	}
	newfile = commentPreprocessorFull(newfile)
	saveFile(filename, newfile)
	return nil
}

func protoReformat(filename string) error {
	// read in the .proto file
	data, err := os.ReadFile(filename)
	if err != nil {
		log.Info("file read failed", filename, err)
		return err
	}

	var newfile string

	var bigName int64
	var bigType int64

	var fmtmsg *FormatMsg
	fmtmsg = new(FormatMsg)

	var inMessage bool
	var allLinesIter iter.Seq[string]
	allLinesIter = makeLineIter(data)
	// gets the max vartype and varname
	for line := range allLinesIter {
		if strings.HasPrefix(line, "message ") {
			inMessage = true
			continue
		}

		// find the end of the message
		if strings.HasPrefix(line, "}") {
			inMessage = false
			setMaxSizes(fmtmsg)
			if bigName < fmtmsg.MaxVarname {
				bigName = fmtmsg.MaxVarname
			}
			if bigType < fmtmsg.MaxVartype {
				bigType = fmtmsg.MaxVartype
			}
			fmtmsg = new(FormatMsg)
			continue
		}

		// don't format or change anything when not in a "message {" section
		if !inMessage {
			continue
		}
	}

	var basemsg *FormatMsg
	basemsg = new(FormatMsg)
	basemsg.MaxVarname = bigName
	basemsg.MaxVartype = bigType
	inMessage = false

	var comments string

	// write out the messages
	allTheLines = newLinesScanner(strings.Split(string(data), "\n"))
	for allTheLines.Scan() {
		line := allTheLines.NextRaw()

		if strings.HasPrefix(line, "oneof ") {
			newmsg := basemsg.newOneofMessage(line)
			newmsg.Notes = strings.Split(comments, "\n")
			newmsg.load()
			inMessage = true
			continue
		}

		if strings.HasPrefix(line, "enum ") {
			newmsg := basemsg.newEnumMessage(line)
			newmsg.Notes = strings.Split(comments, "\n")
			newmsg.load()
			inMessage = true
			continue
		}

		if strings.HasPrefix(line, "message ") {
			log.Info("got to message", line)

			newmsg := basemsg.newStdMessage(line)
			newmsg.Notes = strings.Split(comments, "\n")
			newmsg.load()
			inMessage = true
			continue
		}

		if inMessage {
			comments += fmt.Sprintln(line)
		} else {
			basemsg.Notes = append(basemsg.Notes, line)
		}
	}

	for _, newline := range basemsg.format() {
		newfile += fmt.Sprintln(newline)
	}

	return saveFile(filename, newfile)
}

func saveFile(filename string, data string) error {
	pf, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
	if err != nil {
		log.Info("file open error. permissions?", filename, err)
		return err
	}
	data = strings.TrimSpace(data)
	fmt.Fprintln(pf, data)
	pf.Close()

	// for i, s := range slices.Backward(pf.ToSort) {
	return nil
}

func newDepth(fmtmsg *FormatMsg, header string) *FormatMsg {
	newmsg := new(FormatMsg)
	newmsg.MaxVarname = fmtmsg.MaxVarname
	newmsg.MaxVartype = fmtmsg.MaxVartype
	newmsg.Header = header
	newmsg.Depth = fmtmsg.Depth + 1

	return newmsg
}

// func newStdMessage(fmtmsg *FormatMsg, header string) *StdMessage {
func (msgPB *FormatMsg) newStdMessage(header string) *FormatMsg {
	newmsg := newDepth(msgPB, header)
	newmsg.Type = FormatMsg_MESSAGE
	msgPB.Msgs = append(msgPB.Msgs, newmsg)

	return newmsg
}

func (msgPB *FormatMsg) newOneofMessage(header string) *FormatMsg {
	newmsg := newDepth(msgPB, header)
	newmsg.Type = FormatMsg_ONEOF
	msgPB.Msgs = append(msgPB.Msgs, newmsg)

	return newmsg
}

func (msgPB *FormatMsg) newEnumMessage(header string) *FormatMsg {
	newmsg := newDepth(msgPB, header)
	newmsg.Type = FormatMsg_ENUM
	msgPB.Msgs = append(msgPB.Msgs, newmsg)

	return newmsg
}

// proto files can be defined as trees
// func loadMsgDefinition(msg *StdMessage) {
// func (newMsg *EnumMessage) load() {
// func (msg *StdMessage) loadMsgDefinition(msg *StdMessage) {
func (msg *StdMessage) load() {
	msg.msgPB.load()
}

func (msg *FormatMsg) load() {
	// fmtmsg := msg.msgPB
	for allTheLines.Scan() {
		line := allTheLines.Next()
		if strings.HasPrefix(line, "oneof ") {
			newmsg := msg.newOneofMessage(line)
			newmsg.load()
			continue
		}
		if strings.HasPrefix(line, "enum ") {
			newmsg := msg.newEnumMessage(line)
			newmsg.load()
			continue
		}
		if strings.HasPrefix(line, "message ") {
			// message inception. search for the architect. don't forget your totem
			newmsg := msg.newStdMessage(line)
			newmsg.load()
			continue
		}
		if strings.HasPrefix(line, "}") {
			msg.Footer = line
			return
		}
		msg.Lines = append(msg.Lines, line)
	}

	return
}

// returns vartype, varname, id, end
func tokenMsgVar(line string) (string, string, string, string) {
	parts := strings.Split(line, ";")
	front := parts[0]
	end := strings.Join(parts[1:], ";")

	var id string
	var varname string
	var vartype string

	parts = strings.Fields(front)
	parts, id = slicesPop(parts)
	parts, _ = slicesPop(parts) // this is the "=" sign
	parts, varname = slicesPop(parts)
	vartype = strings.Join(parts, " ")

	return vartype, varname, id, end
}

func slicesPop(parts []string) ([]string, string) {
	if len(parts) == 0 {
		return nil, ""
	}
	if len(parts) == 1 {
		return nil, parts[0]
	}
	x := len(parts)
	end := parts[x-1]
	return parts[0 : x-1], end
}

// 'for x := range' syntax using the smartly done golang 1.24 'iter'
func makeLineIter(data []byte) iter.Seq[string] {
	items := strings.Split(string(data), "\n")
	// log.Println("Made All() Iter.Seq[] with length", len(items))
	return func(yield func(string) bool) {
		for _, v := range items {
			if !yield(v) {
				return
			}
		}
	}
}

// func loadEnumDefinition(newMsg *EnumMessage) *EnumMessage {
func (newMsg *EnumMessage) load() {
	curPB := newMsg.msgPB
	for allTheLines.Scan() {
		line := allTheLines.Next()
		if strings.HasPrefix(line, "}") {
			curPB.Footer = line
			return
		}
		curPB.Lines = append(curPB.Lines, line)
	}
}

// find the max length of varname and vartype
func setMaxSizes(curmsg *FormatMsg) {
	for _, line := range curmsg.Lines {
		parts := strings.Split(line, ";")
		if len(parts) < 2 {
			// line is blank or just a comment
			continue
		}

		vartype, varname, _, _ := tokenMsgVar(line)
		if len(vartype) > int(curmsg.MaxVartype) {
			curmsg.MaxVartype = int64(len(vartype))
		}
		if len(varname) > int(curmsg.MaxVarname) {
			curmsg.MaxVarname = int64(len(varname))
		}
	}
}

// use this for header and footer lines
func (msg *FormatMsg) padBase() string {
	var pad string
	for i := 1; i < int(msg.Depth); i += 1 {
		pad += fmt.Sprintf("%8s", " ")
	}
	return pad
}

// use this for lines inside the message
func (msg *FormatMsg) pad() string {
	var pad string
	for i := 0; i < int(msg.Depth); i += 1 {
		pad += fmt.Sprintf("%8s", " ")
	}
	return pad
}

func formatEnum(curmsg *FormatMsg) []string {
	var newmsg []string
	header := fmt.Sprintf("%s%s // enum depth=%d", curmsg.padBase(), curmsg.Header, curmsg.Depth)
	newmsg = append(newmsg, header)

	for _, line := range curmsg.Lines {
		line = fmt.Sprintf("%s%s", curmsg.pad(), line)
		newmsg = append(newmsg, line)
	}

	footer := fmt.Sprintf("%s%s // enum footer depth=%d", curmsg.padBase(), curmsg.Footer, curmsg.Depth)
	newmsg = append(newmsg, footer)
	return newmsg
}

func (msg *FormatMsg) format() []string {
	switch msg.Type {
	case FormatMsg_ENUM:
		return formatEnum(msg)
	case FormatMsg_MESSAGE:
		return formatMessage(msg)
	}
	return formatMessage(msg)
}

func formatMessage(curmsg *FormatMsg) []string {
	var newmsg []string

	// print the Notes
	for _, line := range curmsg.Notes {
		newmsg = append(newmsg, line)
	}

	if curmsg.Header != "" {
		line := fmt.Sprintf("%s%s // msg depth=%d", curmsg.padBase(), curmsg.Header, curmsg.Depth)
		parts := strings.Fields(line)
		if len(parts) > 3 {
			// hack to actually indent comments on the message line itself. you're welcome
			start := parts[0] + " " + parts[1] + " " + parts[2]
			end := strings.Join(parts[3:], " ")
			offset := int(curmsg.MaxVarname) + int(curmsg.MaxVartype) + 16 - len(start)
			pad := fmt.Sprintf("%d", offset)
			hmm := "%s     %" + pad + "s %s // depth=%d"
			line = fmt.Sprintf(hmm, start, " ", end, curmsg.Depth)
		} else {
			line = fmt.Sprintf("%s // len(parts)=%d depth=%d", line, len(parts), curmsg.Depth)
		}
		newmsg = append(newmsg, line) // " //header")
	} else {
		if curmsg.Depth != 0 {
			newmsg = append(newmsg, "// ERROR: header was blank") // +" //header")
		}
	}

	// find the max length of varname and vartype
	setMaxSizes(curmsg)

	for _, msg := range curmsg.Msgs {
		switch msg.Type {
		case FormatMsg_ENUM:
			for _, line := range formatEnum(msg) {
				line = fmt.Sprintf("%s%s", curmsg.pad(), line)
				newmsg = append(newmsg, line)
			}
		case FormatMsg_MESSAGE:
			for _, line := range formatMessage(msg) {
				line = fmt.Sprintf("%s%s", curmsg.pad(), line)
				newmsg = append(newmsg, line)
			}
		default:
		}
	}

	for _, line := range curmsg.Lines {
		line = strings.TrimSpace(line)
		if line == "" {
			newmsg = append(newmsg, line)
			continue
		}
		if strings.HasPrefix(line, "//") {
			pad := fmt.Sprintf("%d", curmsg.MaxVartype+curmsg.MaxVarname+21)
			hmm := "%" + pad + "s %s"
			line = fmt.Sprintf(hmm, " ", line) // todo: compute 50
			newmsg = append(newmsg, line)
			continue
		}
		mt := fmt.Sprintf("%d", curmsg.MaxVartype)
		mv := fmt.Sprintf("%d", curmsg.MaxVarname)

		hmm := "        %-" + mt + "s   %-" + mv + "s     = %-3s %s"

		vartype, varname, id, end := tokenMsgVar(line)
		end = strings.TrimSpace(end)
		id = id + ";"

		newline := fmt.Sprintf(hmm, vartype, varname, id, end)
		newline = strings.TrimRight(newline, " ")
		newmsg = append(newmsg, newline)
	}
	newmsg = append(newmsg, curmsg.Footer) // +" //footer")
	return newmsg
}

// DEFINE THE Lines ITERATOR.
// itializes a new iterator.
func newLinesScanner(things []string) *LinesScanner {
	return &LinesScanner{things: things}
}

type LinesScanner struct {
	sync.Mutex

	things []string
	index  int
}

func (it *LinesScanner) Scan() bool {
	if it.index >= len(it.things) {
		return false
	}
	it.Lock()
	it.index++
	it.Unlock()
	return true
}

// does no cleaning of the data
func (it *LinesScanner) NextRaw() string {
	if it.index-1 == len(it.things) {
		fmt.Println("Next() error in LinesScanner", it.index)
	}
	return it.things[it.index-1]
}

// trims whitespace
func (it *LinesScanner) Next() string {
	if it.index-1 == len(it.things) {
		fmt.Println("Next() error in LinesScanner", it.index)
	}
	// out := commentPreprocessor(it.things[it.index-1])
	out := it.things[it.index-1]
	return strings.TrimSpace(out)
	// return out
}

// END DEFINE THE ITERATOR

// turns: "/* test */ reserved /* linkPreviews */ 4;"
// into:  reserved 1; // test // linkPreviews
func commentPreprocessor(line string) string {
	// Match all /* comment */ blocks
	re := regexp.MustCompile(`/\*([^*]+)\*/`)
	matches := re.FindAllStringSubmatch(line, -1)

	// Extract just the comment texts
	var comments []string
	for _, match := range matches {
		comments = append(comments, strings.TrimSpace(match[1]))
		// comments = append(comments, match[1])
	}

	// Remove the block comments from the original line
	line = re.ReplaceAllString(line, "")
	// line = strings.TrimSpace(line)
	line = strings.TrimSuffix(line, " ")

	// Append comments at the end with //
	for _, comment := range comments {
		line += " // " + comment
	}

	return line
}

// /* this
//   - thing
//     */
//
// becomes
//
// this
// thing
func commentPreprocessorFull(full string) string {
	// Match all /* comment */ blocks
	// re := regexp.MustCompile(`/\*([^*]+)\*/`)
	re := regexp.MustCompile(`(?s)/\*(.*?)\*/`)

	return re.ReplaceAllStringFunc(full, func(s string) string {
		log.Info("FOUND:\n", s)
		lines := strings.Split(s, "\n")
		var cleaned []string

		for _, line := range lines {
			trimmed := strings.TrimSpace(line)

			switch {
			case strings.HasPrefix(trimmed, "/*"):
				trimmed = trimCommentPrefix(trimmed)
			case strings.HasPrefix(trimmed, "*/"):
				trimmed = strings.TrimPrefix(trimmed, "*/")
			case strings.HasPrefix(trimmed, "*"):
				trimmed = strings.TrimPrefix(trimmed, "*")
			}
			trimmed = "// " + trimmed

			cleaned = append(cleaned, strings.TrimSpace(trimmed))
		}

		s = strings.Join(cleaned, "\n")
		log.Info("NOW:\n", s)
		return s
	})
}

func trimCommentPrefix(line string) string {
	trimmed := strings.TrimSpace(line)

	if strings.HasPrefix(trimmed, "/") {
		i := 1
		for i < len(trimmed) && trimmed[i] == '*' {
			i++
		}
		if i > 1 {
			return strings.TrimSpace(trimmed[i:])
		}
	}

	if strings.HasPrefix(trimmed, "*") {
		return strings.TrimSpace(trimmed[1:])
	}

	if trimmed == "*/" {
		return ""
	}

	return trimmed
}