Skip to content

Instantly share code, notes, and snippets.

@skeeto
Last active December 31, 2025 21:42
Show Gist options
  • Select an option

  • Save skeeto/ae08899356acc08f88c23c97239c78b1 to your computer and use it in GitHub Desktop.

Select an option

Save skeeto/ae08899356acc08f88c23c97239c78b1 to your computer and use it in GitHub Desktop.
JSON example parser
// $ cc -o parser parser.c
// $ ./parser <schema.json
// Ref: https://old.reddit.com/r/C_Programming/comments/1q0c2j7
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define lenof(a) (ptrdiff_t)(sizeof(a) / sizeof(*(a)))
#define S(s) (Str){s, lenof(s)-1}
#define new(a, n, t) (t *)alloc(a, n, sizeof(t), _Alignof(t))
static size_t tousize(ptrdiff_t x)
{
assert(x >= 0);
return (size_t)x;
}
typedef struct {
char *beg;
char *end;
} Arena;
static char *alloc(Arena *a, ptrdiff_t count, int size, int align)
{
int pad = (int)-(uintptr_t)a->beg & (align - 1);
assert(count < (a->end - a->beg - pad)/size); // OOM policy
char *r = a->beg + pad;
a->beg += pad + count*size;
return memset(r, 0, tousize(count*size));
}
typedef struct {
char *data;
ptrdiff_t len;
} Str;
static bool equals(Str a, Str b)
{
return a.len==b.len && !memcmp(a.data, b.data, tousize(a.len));
}
static Str slice(Str s, ptrdiff_t beg, ptrdiff_t end)
{
assert(beg>=0 && beg<=end && end<=s.len);
s.data += beg;
s.len = end - beg;
return s;
}
static uint64_t hash(Str s)
{
uint64_t r = 0x100;
for (ptrdiff_t i = 0; i < s.len; i++) {
r ^= s.data[i] & 255;
r *= 1111111111111111111;
}
return r;
}
static int64_t parse_integer(Str s)
{
ptrdiff_t i = 0;
uint64_t sign = 1;
if (!s.len) {
return 0;
}
switch (*s.data) {
case '-':
sign = (uint64_t)-1;
// fallthrough
case '+':
i++;
break;
}
uint64_t r = 0;
for (; i < s.len; i++) {
r = r*10 + (uint64_t)(s.data[i] - '0');
}
return (int64_t)(r * sign);
}
typedef enum {
K_null, K_alternates, K_codec, K_created_at, K_down, K_ext, K_file,
K_fps, K_has, K_height, K_id, K_md5, K_original, K_posts, K_preview,
K_sample, K_samples, K_score, K_size, K_total, K_up, K_updated_at,
K_url, K_variants, K_width,
} Keyword;
static Keyword parse_keyword(Str s)
{
static struct {
int16_t value;
int8_t len;
char name[13];
} table[] = {
#define E(n) {K_##n, lenof(#n)-1, #n}
E(alternates), E(codec), E(created_at), E(down), E(ext), E(file),
E(fps), E(has), E(height), E(id), E(md5), E(original), E(posts),
E(preview), E(sample), E(samples), E(score), E(size), E(total),
E(up), E(updated_at), E(url), E(variants), E(width),
#undef E
};
for (int i = 0; i < lenof(table); i++) {
Str name = (Str){table[i].name, table[i].len};
if (equals(s, name)) {
return table[i].value;
}
}
return K_null;
}
typedef enum {
T_error, T_eof, T_integer, T_true, T_false, T_string, T_open, T_close
} Type;
typedef struct {
Type type;
Str value;
} Token;
typedef struct {
Str input;
ptrdiff_t off;
} Parser;
static bool whitespace(char c)
{
return c<=' ' || c==':' || c==',';
}
static bool digit(char c)
{
return c>='0' && c<='9';
}
static Token next(ptrdiff_t *pcursor, Str s)
{
ptrdiff_t cursor = *pcursor;
for (; cursor<s.len && whitespace(s.data[cursor]); cursor++) {}
if (cursor == s.len) {
*pcursor = cursor;
return (Token){T_eof};
}
ptrdiff_t end;
switch (s.data[cursor]) {
case '"':
end = ++cursor;
for (; end<s.len && s.data[end]!='"'; end++) {}
if (end == s.len) {
*pcursor = cursor;
return (Token){T_error};
}
*pcursor = end + 1;
return (Token){T_string, slice(s, cursor, end)};
case 't':
if (s.len-cursor<4 || !equals(S("true"), slice(s, cursor, cursor+4))) {
*pcursor = cursor;
return (Token){T_error};
}
*pcursor = cursor + 4;
return (Token){T_true};
case 'f':
if (s.len-cursor<5 || !equals(S("false"), slice(s, cursor, cursor+5))) {
*pcursor = cursor;
return (Token){T_error};
}
*pcursor = cursor + 5;
return (Token){T_false};
case '-':
case '+':
case '0' ... '9':
end = cursor + 1;
for (; end<s.len && digit(s.data[end]); end++) {}
*pcursor = end;
return (Token){T_integer, slice(s, cursor, end)};
case '{':
case '[':
*pcursor = cursor + 1;
return (Token){T_open};
case '}':
case ']':
*pcursor = cursor + 1;
return (Token){T_close};
}
*pcursor = cursor;
return (Token){T_error};
}
typedef struct {
int64_t width;
int64_t height;
Str ext;
int64_t size;
Str md5;
Str url;
} File;
typedef struct {
int64_t width;
int64_t height;
Str url;
} Preview;
typedef struct {
int64_t fps;
Str codec;
int64_t size;
int64_t width;
int64_t height;
Str url;
} Original;
typedef struct Spec Spec;
struct Spec {
Spec *child[2];
Spec *next;
Str _key;
int64_t fps;
Str codec;
int64_t size;
int64_t width;
int64_t height;
Str url;
};
static Spec *new_spec(Spec **m, Str key, Arena *a)
{
for (uint64_t h = hash(key); *m; h <<= 1) {
m = &(*m)->child[h>>63];
}
*m = new(a, 1, Spec);
(*m)->_key = key;
return *m;
}
static Spec *find_spec(Spec *m, Str key)
{
for (uint64_t h = hash(key); m; h <<= 1) {
if (equals(key, m->_key)) {
return m;
}
m = m->child[h>>63];
}
return 0;
}
typedef struct {
bool has;
Original original;
Spec *variants;
Spec *samples;
} Alternates;
typedef struct {
bool has;
int64_t height;
int64_t width;
Str url;
Alternates alternates;
} Sample;
typedef struct {
int64_t up;
int64_t down;
int64_t total;
} Score;
typedef struct Post Post;
struct Post {
Post *next;
int64_t id;
Str created_at; // or parse further
Str updated_at; // "
File file;
Preview preview;
Sample sample;
Score score;
};
static bool skip(ptrdiff_t *c, Str json)
{
ptrdiff_t depth = 0;
do {
Token token = next(c, json);
switch (token.type) {
default:
break;
case T_error:
case T_eof:
return false;
case T_open:
depth++;
break;
case T_close:
depth--;
break;
}
} while (depth);
return true;
}
static bool parse_file(File *d, ptrdiff_t *c, Str json)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_width:
token = next(c, json);
if (token.type != T_integer) return false;
d->width = parse_integer(token.value);
break;
case K_height:
token = next(c, json);
if (token.type != T_integer) return false;
d->height = parse_integer(token.value);
break;
case K_ext:
token = next(c, json);
if (token.type != T_string) return false;
d->ext = token.value;
break;
case K_size:
token = next(c, json);
if (token.type != T_integer) return false;
d->size = parse_integer(token.value);
break;
case K_md5:
token = next(c, json);
if (token.type != T_string) return false;
d->md5 = token.value;
break;
case K_url:
token = next(c, json);
if (token.type != T_string) return false;
d->url = token.value;
break;
}
}
}
}
static bool parse_original(Original *d, ptrdiff_t *c, Str json)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_fps:
token = next(c, json);
if (token.type != T_integer) return false;
d->fps = parse_integer(token.value);
break;
case K_codec:
token = next(c, json);
if (token.type != T_string) return false;
d->codec = token.value;
break;
case K_size:
token = next(c, json);
if (token.type != T_integer) return false;
d->size = parse_integer(token.value);
break;
case K_width:
token = next(c, json);
if (token.type != T_integer) return false;
d->width = parse_integer(token.value);
break;
case K_height:
token = next(c, json);
if (token.type != T_integer) return false;
d->height = parse_integer(token.value);
break;
case K_url:
token = next(c, json);
if (token.type != T_string) return false;
d->url = token.value;
break;
}
}
}
}
static bool parse_spec(Spec *d, ptrdiff_t *c, Str json)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_fps:
token = next(c, json);
if (token.type != T_integer) return false;
d->fps = parse_integer(token.value);
break;
case K_codec:
token = next(c, json);
if (token.type != T_string) return false;
d->codec = token.value;
break;
case K_size:
token = next(c, json);
if (token.type != T_integer) return false;
d->size = parse_integer(token.value);
break;
case K_width:
token = next(c, json);
if (token.type != T_integer) return false;
d->width = parse_integer(token.value);
break;
case K_height:
token = next(c, json);
if (token.type != T_integer) return false;
d->height = parse_integer(token.value);
break;
case K_url:
token = next(c, json);
if (token.type != T_string) return false;
d->url = token.value;
break;
}
}
}
}
static bool parse_alternates(Alternates *d, ptrdiff_t *c, Str json, Arena *a)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_has:
token = next(c, json);
if (token.type == T_true) d->has = true;
else if (token.type == T_false) d->has = false;
else return false;
break;
case K_original:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_original(&d->original, c, json)) return false;
break;
case K_variants: {
token = next(c, json);
if (token.type != T_open) return false;
Spec *head = 0;
Spec **tail = &head;
for (;;) {
Token keytok = next(c, json);
if (keytok.type == T_close) break;
if (keytok.type != T_string) return false;
Str key = keytok.value;
token = next(c, json);
if (token.type != T_open) return false;
Spec *spec = new_spec(&head, key, a);
*tail = spec;
tail = &spec->next;
if (!parse_spec(spec, c, json)) return false;
}
d->variants = head;
} break;
case K_samples: {
token = next(c, json);
if (token.type != T_open) return false;
Spec *head = 0;
Spec **tail = &head;
for (;;) {
Token keytok = next(c, json);
if (keytok.type == T_close) break;
if (keytok.type != T_string) return false;
Str key = keytok.value;
token = next(c, json);
if (token.type != T_open) return false;
Spec *spec = new_spec(&head, key, a);
*tail = spec;
tail = &spec->next;
if (!parse_spec(spec, c, json)) return false;
}
d->samples = head;
} break;
}
}
}
}
static bool parse_preview(Preview *d, ptrdiff_t *c, Str json)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_width:
token = next(c, json);
if (token.type != T_integer) return false;
d->width = parse_integer(token.value);
break;
case K_height:
token = next(c, json);
if (token.type != T_integer) return false;
d->height = parse_integer(token.value);
break;
case K_url:
token = next(c, json);
if (token.type != T_string) return false;
d->url = token.value;
break;
}
}
}
}
static bool parse_sample(Sample *d, ptrdiff_t *c, Str json, Arena *a)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_has:
token = next(c, json);
if (token.type == T_true) d->has = true;
else if (token.type == T_false) d->has = false;
else return false;
break;
case K_height:
token = next(c, json);
if (token.type != T_integer) return false;
d->height = parse_integer(token.value);
break;
case K_width:
token = next(c, json);
if (token.type != T_integer) return false;
d->width = parse_integer(token.value);
break;
case K_url:
token = next(c, json);
if (token.type != T_string) return false;
d->url = token.value;
break;
case K_alternates:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_alternates(&d->alternates, c, json, a)) return false;
break;
}
}
}
}
static bool parse_score(Score *d, ptrdiff_t *c, Str json)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_up:
token = next(c, json);
if (token.type != T_integer) return false;
d->up = parse_integer(token.value);
break;
case K_down:
token = next(c, json);
if (token.type != T_integer) return false;
d->down = parse_integer(token.value);
break;
case K_total:
token = next(c, json);
if (token.type != T_integer) return false;
d->total = parse_integer(token.value);
break;
}
}
}
}
static bool parse_post(Post *d, ptrdiff_t *c, Str json, Arena *a)
{
for (;;) {
Token token = next(c, json);
switch (token.type) {
default:
return false;
case T_close:
return true;
case T_string:
switch (parse_keyword(token.value)) {
default:
if (!skip(c, json)) return false;
break;
case K_id:
token = next(c, json);
if (token.type != T_integer) return false;
d->id = parse_integer(token.value);
break;
case K_created_at:
token = next(c, json);
if (token.type != T_string) return false;
d->created_at = token.value;
break;
case K_updated_at:
token = next(c, json);
if (token.type != T_string) return false;
d->updated_at = token.value;
break;
case K_file:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_file(&d->file, c, json)) return false;
break;
case K_preview:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_preview(&d->preview, c, json)) return false;
break;
case K_sample:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_sample(&d->sample, c, json, a)) return false;
break;
case K_score:
token = next(c, json);
if (token.type != T_open) return false;
if (!parse_score(&d->score, c, json)) return false;
break;
}
}
}
}
static Post *parse(Str json, Arena *a)
{
ptrdiff_t c = 0;
if (next(&c, json).type != T_open) return 0;
if (parse_keyword(next(&c, json).value) != K_posts) return 0;
if (next(&c, json).type != T_open) return 0;
Post *head = 0;
Post **tail = &head;
for (;;) {
Token token = next(&c, json);
switch (token.type) {
default:
return 0;
case T_open:
Post *post = new(a, 1, Post);
if (!parse_post(post, &c, json, a)) return 0;
*tail = post;
tail = &post->next;
case T_close:
return head;
}
}
}
static Spec *get_variant(Post *post, Str key)
{
return find_spec(post->sample.alternates.variants, key);
}
static Spec *get_sample(Post *post, Str key)
{
return find_spec(post->sample.alternates.samples, key);
}
static void print_str(Str s)
{
printf("\"%.*s\"", (int)s.len, s.data);
}
static void print_i64(int64_t v)
{
printf("%lld", (long long)v);
}
static void print_bool(_Bool b)
{
printf(b ? "true" : "false");
}
static void print_file(const File *f)
{
printf("{\"width\":"); print_i64(f->width);
printf(",\"height\":"); print_i64(f->height);
printf(",\"ext\":"); print_str(f->ext);
printf(",\"size\":"); print_i64(f->size);
printf(",\"md5\":"); print_str(f->md5);
printf(",\"url\":"); print_str(f->url);
printf("}");
}
static void print_preview(const Preview *p)
{
printf("{\"width\":"); print_i64(p->width);
printf(",\"height\":"); print_i64(p->height);
printf(",\"url\":"); print_str(p->url);
printf("}");
}
static void print_original(const Original *o)
{
printf("{\"fps\":"); print_i64(o->fps);
printf(",\"codec\":"); print_str(o->codec);
printf(",\"size\":"); print_i64(o->size);
printf(",\"width\":"); print_i64(o->width);
printf(",\"height\":"); print_i64(o->height);
printf(",\"url\":"); print_str(o->url);
printf("}");
}
static void print_spec(const Spec *s)
{
printf("{\"fps\":"); print_i64(s->fps);
printf(",\"codec\":"); print_str(s->codec);
printf(",\"size\":"); print_i64(s->size);
printf(",\"width\":"); print_i64(s->width);
printf(",\"height\":"); print_i64(s->height);
printf(",\"url\":"); print_str(s->url);
printf("}");
}
static void print_spec_map(const Spec *head)
{
printf("{");
const Spec *cur = head;
while (cur) {
print_str(cur->_key);
printf(":");
print_spec(cur);
if (cur->next) printf(",");
cur = cur->next;
}
printf("}");
}
static void print_alternates(const Alternates *a)
{
printf("{\"has\":"); print_bool(a->has);
printf(",\"original\":");
print_original(&a->original);
printf(",\"variants\":");
print_spec_map(a->variants);
printf(",\"samples\":");
print_spec_map(a->samples);
printf("}");
}
static void print_sample(const Sample *s)
{
printf("{\"has\":"); print_bool(s->has);
printf(",\"height\":"); print_i64(s->height);
printf(",\"width\":"); print_i64(s->width);
printf(",\"url\":"); print_str(s->url);
printf(",\"alternates\":");
print_alternates(&s->alternates);
printf("}");
}
static void print_score(const Score *sc)
{
printf("{\"up\":"); print_i64(sc->up);
printf(",\"down\":"); print_i64(sc->down);
printf(",\"total\":"); print_i64(sc->total);
printf("}");
}
static void print_post(const Post *p)
{
printf("{\"id\":"); print_i64(p->id);
printf(",\"created_at\":"); print_str(p->created_at);
printf(",\"updated_at\":"); print_str(p->updated_at);
printf(",\"file\":"); print_file(&p->file);
printf(",\"preview\":"); print_preview(&p->preview);
printf(",\"sample\":"); print_sample(&p->sample);
printf(",\"score\":"); print_score(&p->score);
printf("}");
}
static void print_posts(Post *posts)
{
printf("{\"posts\":[");
Post *cur = posts;
while (cur) {
print_post(cur);
if (cur->next) printf(",");
cur = cur->next;
}
printf("]}\n");
}
static Str load(Arena *a)
{
Str r = {};
r.data = a->beg;
r.len = (ptrdiff_t)fread(r.data, 1, tousize(a->end - a->beg), stdin);
a->beg += r.len;
return r;
}
int main()
{
static char mem[1<<21];
Arena a = {mem, mem+lenof(mem)};
Str json = load(&a);
Post *posts = parse(json, &a);
ptrdiff_t count = 0;
fputs("{", stdout);
for (Post *p = posts; p; p = p->next) {
Spec *p720 = get_sample(posts, S("720p"));
if (p720) {
if (count++) fputs(",", stdout);
fputs("\"720p\":", stdout);
print_spec(p720);
}
Spec *mp4 = get_variant(posts, S("mp4"));
if (mp4) {
if (count++) fputs(",", stdout);
fputs("\"mp4\":", stdout);
print_spec(mp4);
}
}
fputs("}\n", stdout);
print_posts(posts);
}
{
"posts": [
{
"id": 0,
"created_at": "2025-12-31T12:30:51.312Z",
"updated_at": "2025-12-31T12:30:51.312Z",
"file": {
"width": 0,
"height": 0,
"ext": "string",
"size": 0,
"md5": "string",
"url": "string"
},
"preview": {
"width": 0,
"height": 0,
"url": "string"
},
"sample": {
"has": true,
"height": 0,
"width": 0,
"url": "string",
"alternates": {
"has": true,
"original": {
"fps": 0,
"codec": "string",
"size": 0,
"width": 0,
"height": 0,
"url": "string"
},
"variants": {
"webm": {
"fps": 0,
"codec": "string",
"size": 0,
"width": 0,
"height": 0,
"url": "string"
},
"mp4": {
"fps": 0,
"codec": "string",
"size": 0,
"width": 0,
"height": 0,
"url": "string"
}
},
"samples": {
"480p": {
"fps": 0,
"codec": "string",
"size": 0,
"width": 0,
"height": 0,
"url": "string"
},
"720p": {
"fps": 0,
"codec": "string",
"size": 0,
"width": 0,
"height": 0,
"url": "string"
}
}
}
},
"score": {
"up": 0,
"down": 0,
"total": 0
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment