// 17 may 2020
#include "uipriv.h"
#include "third_party/utf.h"

// TODO write separate tests for this file?
// TODO ideally this functionality should really be part of utf itself, in some form or another (for instance, via utf8SanitizedLen() + the requisite loop)

#define nGrow 32

char *uiprivSanitizeUTF8(const char *str)
{
	size_t len;
	char *out;
	const char *s;
	size_t i;
	uint32_t rune;
	char encoded[4];
	size_t n;

	// TODO can we even use strlen() with UTF-8 strings? or is '\0' == 0 == actual memory zero just a source code connection (and thus the last one isn't necessarily true)?
	len = strlen(str);
	out = (char *) uiprivAlloc((len + 1) * sizeof (char), "sanitized UTF-8 string");
	s = str;
	i = 0;
	while (*s != '\0') {
		s = uiprivUTF8DecodeRune(s, 0, &rune);
		n = uiprivUTF8EncodeRune(rune, encoded);
		if ((i + n) >= len) {
			out = (char *) uiprivRealloc(out, (len + 1) * sizeof (char), (len + nGrow + 1) * sizeof (char), "sanitized UTF-8 string");
			len += nGrow;
		}
		memcpy(out + i, encoded, n);
		i += n;
	}
	return out;
}

void uiprivFreeUTF8(char *sanitized)
{
	uiprivFree(sanitized);
}