Skip to content

Instantly share code, notes, and snippets.

@fpawel
Created January 14, 2025 10:12
Show Gist options
  • Select an option

  • Save fpawel/b00f23b2e240cba351d8405e6d25226f to your computer and use it in GitHub Desktop.

Select an option

Save fpawel/b00f23b2e240cba351d8405e6d25226f to your computer and use it in GitHub Desktop.
package reflect
import (
"fmt"
"reflect"
"strconv"
"strings"
"unicode/utf8"
)
// RemoveInvalidUTF8 удалить не валидные UTF-8 руны из всех строк, вложенных в динамический объект.
// Можно использовать для валидации объекта перед вызовом proto.Marshall https://pkg.go.dev/github.com/golang/protobuf/proto#Marshal
// для избежания ошибки string field contains invalid UTF-8
func RemoveInvalidUTF8(in interface{}) {
removeInvalidUTF8Runes(indirect(in))
}
func CheckInvalidUTF8Runes(in any) any {
return checkInvalidUTF8Runes(indirect(in))
}
func indirect(v interface{}) reflect.Value {
return reflect.Indirect(reflect.ValueOf(v))
}
func removeInvalidUTF8Runes(v reflect.Value) {
switch v.Kind() {
case reflect.String:
s := reflect.ValueOf(v.Interface()).String()
if !utf8.Valid([]byte(s)) {
v.SetString(strings.ToValidUTF8(s, ""))
}
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
if v.Type().Field(i).IsExported() {
removeInvalidUTF8Runes(v.Field(i))
}
}
case reflect.Ptr:
removeInvalidUTF8Runes(v.Elem())
case reflect.Slice, reflect.Array:
for i := 0; i < v.Len(); i++ {
removeInvalidUTF8Runes(v.Index(i))
}
case reflect.Map:
for _, k := range v.MapKeys() {
mv := v.MapIndex(k)
d := reflect.New(mv.Type())
d.Elem().Set(mv)
removeInvalidUTF8Runes(d)
v.SetMapIndex(k, d.Elem())
}
default:
return
}
}
func checkInvalidUTF8Runes(v reflect.Value) any {
switch v.Kind() {
case reflect.String:
s := reflect.ValueOf(v.Interface()).String()
if !utf8.Valid([]byte(s)) {
s = strconv.Quote(s)
return s[1 : len(s)-1]
}
case reflect.Struct:
m := make(map[string]any, v.NumField())
for i := 0; i < v.NumField(); i++ {
if !v.Type().Field(i).IsExported() {
continue
}
if v.Type().Field(i).IsExported() {
if x := checkInvalidUTF8Runes(v.Field(i)); x != nil {
m[v.Type().Field(i).Name] = x
}
}
}
if len(m) != 0 {
return m
}
case reflect.Ptr:
return checkInvalidUTF8Runes(v.Elem())
case reflect.Slice, reflect.Array:
xs := make([]any, 0, v.Len())
for i := 0; i < v.Len(); i++ {
if x := checkInvalidUTF8Runes(v.Index(i)); x != nil {
xs = append(xs, x)
}
}
if len(xs) != 0 {
return xs
}
case reflect.Map:
m := make(map[string]any, len(v.MapKeys()))
for _, k := range v.MapKeys() {
if x := checkInvalidUTF8Runes(v.MapIndex(k)); x != nil {
m[fmt.Sprintf("%v", k)] = x
}
}
if len(m) != 0 {
return m
}
default:
}
return nil
}
package reflect
import (
"encoding/json"
"github.com/stretchr/testify/assert"
"testing"
"github.com/stretchr/testify/require"
)
func TestReplaceInvalidUTF8(t *testing.T) {
type (
AA string
BB struct {
Str string
Map map[string][]AA
}
)
var x BB
x.Str = "s\xc5"
x.Map = map[string][]AA{
"k1": {"ab\xc5"},
"k2": {"bc\xc5"},
}
RemoveInvalidUTF8(&x)
b, _ := json.Marshal(x)
require.JSONEq(t, `{"Str":"s","Map":{"k1":["ab"],"k2":["bc"]}}`, string(b))
}
func TestCheckInvalidUTF8(t *testing.T) {
type (
A string
C struct {
CStr string
}
B struct {
Str string
C C
Map map[string][]A
}
)
var x B
x.C = C{CStr: "cd\xc5"}
x.Str = "ab\xc5"
x.Map = map[string][]A{
"k1": {"ab\xc5"},
"k2": {"bc\xc5"},
}
b, _ := json.Marshal(CheckInvalidUTF8Runes(&x))
assert.JSONEq(t, `{"C":{"CStr":"cd\\xc5"},"Map":{"k1":["ab\\xc5"],"k2":["bc\\xc5"]},"Str":"ab\\xc5"}`, string(b))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment