Created
January 14, 2025 10:12
-
-
Save fpawel/b00f23b2e240cba351d8405e6d25226f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package reflect | |
| import ( | |
| "fmt" | |
| "reflect" | |
| "strconv" | |
| "strings" | |
| "unicode/utf8" | |
| ) | |
| // RemoveInvalidUTF8 удалить не валидные UTF-8 руны из всех строк, вложенных в динамический объект. | |
| // Можно использовать для валидации объекта перед вызовом proto.Marshall https://pkg.go.dev/github.com/golang/protobuf/proto#Marshal | |
| // для избежания ошибки string field contains invalid UTF-8 | |
| func RemoveInvalidUTF8(in interface{}) { | |
| removeInvalidUTF8Runes(indirect(in)) | |
| } | |
| func CheckInvalidUTF8Runes(in any) any { | |
| return checkInvalidUTF8Runes(indirect(in)) | |
| } | |
| func indirect(v interface{}) reflect.Value { | |
| return reflect.Indirect(reflect.ValueOf(v)) | |
| } | |
| func removeInvalidUTF8Runes(v reflect.Value) { | |
| switch v.Kind() { | |
| case reflect.String: | |
| s := reflect.ValueOf(v.Interface()).String() | |
| if !utf8.Valid([]byte(s)) { | |
| v.SetString(strings.ToValidUTF8(s, "")) | |
| } | |
| case reflect.Struct: | |
| for i := 0; i < v.NumField(); i++ { | |
| if v.Type().Field(i).IsExported() { | |
| removeInvalidUTF8Runes(v.Field(i)) | |
| } | |
| } | |
| case reflect.Ptr: | |
| removeInvalidUTF8Runes(v.Elem()) | |
| case reflect.Slice, reflect.Array: | |
| for i := 0; i < v.Len(); i++ { | |
| removeInvalidUTF8Runes(v.Index(i)) | |
| } | |
| case reflect.Map: | |
| for _, k := range v.MapKeys() { | |
| mv := v.MapIndex(k) | |
| d := reflect.New(mv.Type()) | |
| d.Elem().Set(mv) | |
| removeInvalidUTF8Runes(d) | |
| v.SetMapIndex(k, d.Elem()) | |
| } | |
| default: | |
| return | |
| } | |
| } | |
| func checkInvalidUTF8Runes(v reflect.Value) any { | |
| switch v.Kind() { | |
| case reflect.String: | |
| s := reflect.ValueOf(v.Interface()).String() | |
| if !utf8.Valid([]byte(s)) { | |
| s = strconv.Quote(s) | |
| return s[1 : len(s)-1] | |
| } | |
| case reflect.Struct: | |
| m := make(map[string]any, v.NumField()) | |
| for i := 0; i < v.NumField(); i++ { | |
| if !v.Type().Field(i).IsExported() { | |
| continue | |
| } | |
| if v.Type().Field(i).IsExported() { | |
| if x := checkInvalidUTF8Runes(v.Field(i)); x != nil { | |
| m[v.Type().Field(i).Name] = x | |
| } | |
| } | |
| } | |
| if len(m) != 0 { | |
| return m | |
| } | |
| case reflect.Ptr: | |
| return checkInvalidUTF8Runes(v.Elem()) | |
| case reflect.Slice, reflect.Array: | |
| xs := make([]any, 0, v.Len()) | |
| for i := 0; i < v.Len(); i++ { | |
| if x := checkInvalidUTF8Runes(v.Index(i)); x != nil { | |
| xs = append(xs, x) | |
| } | |
| } | |
| if len(xs) != 0 { | |
| return xs | |
| } | |
| case reflect.Map: | |
| m := make(map[string]any, len(v.MapKeys())) | |
| for _, k := range v.MapKeys() { | |
| if x := checkInvalidUTF8Runes(v.MapIndex(k)); x != nil { | |
| m[fmt.Sprintf("%v", k)] = x | |
| } | |
| } | |
| if len(m) != 0 { | |
| return m | |
| } | |
| default: | |
| } | |
| return nil | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package reflect | |
| import ( | |
| "encoding/json" | |
| "github.com/stretchr/testify/assert" | |
| "testing" | |
| "github.com/stretchr/testify/require" | |
| ) | |
| func TestReplaceInvalidUTF8(t *testing.T) { | |
| type ( | |
| AA string | |
| BB struct { | |
| Str string | |
| Map map[string][]AA | |
| } | |
| ) | |
| var x BB | |
| x.Str = "s\xc5" | |
| x.Map = map[string][]AA{ | |
| "k1": {"ab\xc5"}, | |
| "k2": {"bc\xc5"}, | |
| } | |
| RemoveInvalidUTF8(&x) | |
| b, _ := json.Marshal(x) | |
| require.JSONEq(t, `{"Str":"s","Map":{"k1":["ab"],"k2":["bc"]}}`, string(b)) | |
| } | |
| func TestCheckInvalidUTF8(t *testing.T) { | |
| type ( | |
| A string | |
| C struct { | |
| CStr string | |
| } | |
| B struct { | |
| Str string | |
| C C | |
| Map map[string][]A | |
| } | |
| ) | |
| var x B | |
| x.C = C{CStr: "cd\xc5"} | |
| x.Str = "ab\xc5" | |
| x.Map = map[string][]A{ | |
| "k1": {"ab\xc5"}, | |
| "k2": {"bc\xc5"}, | |
| } | |
| b, _ := json.Marshal(CheckInvalidUTF8Runes(&x)) | |
| assert.JSONEq(t, `{"C":{"CStr":"cd\\xc5"},"Map":{"k1":["ab\\xc5"],"k2":["bc\\xc5"]},"Str":"ab\\xc5"}`, string(b)) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment