Edit 2024-12-18:
Support for mouse emulation has now been implemented. See https://zmk.dev/docs/keymaps/behaviors/mouse-emulation
The instructions below are outdated.
| // example how to set up WebGPU rendering on Windows in C | |
| // uses Dawn implementation of WebGPU: https://dawn.googlesource.com/dawn/ | |
| // download pre-built Dawn webgpu.h/dll/lib files from https://github.com/mmozeiko/build-dawn/releases/latest | |
| #include "webgpu.h" | |
| #define _CRT_SECURE_NO_DEPRECATE | |
| #define WIN32_LEAN_AND_MEAN | |
| #include <windows.h> |
| # I happened to be looking at some of Cranelift's code, and I noticed that their constant-time dominates() | |
| # check was using a somewhat more ad-hoc version of a hidden gem from the data structures literature called the | |
| # parenthesis representation for trees. As far as I know, this was invented by Jacobson in his 1989 paper | |
| # Space-Efficient Static Trees and Graphs. I first learned about it from the slightly later paper by Munro and Raman | |
| # called Succinct Representations of Balanced Parentheses and Static Trees. I figured I'd give it an extremely | |
| # quick intro and then show how it leads to a (slightly better) version of Cranelift's algorithm. | |
| # | |
| # This parenthesis representation of trees is surprisingly versatile, but its most striking feature is that | |
| # it lets us query the ancestor relationship between two nodes in a tree in constant time, with a few instructions. | |
| # And the idea is extremely simple and intuitive if you just draw the right kind of picture. |
Edit 2024-12-18:
Support for mouse emulation has now been implemented. See https://zmk.dev/docs/keymaps/behaviors/mouse-emulation
The instructions below are outdated.
| partial ordering: time/events are related to each other as graphs of dependencies | |
| total ordering: time/events are related to each other as a sequence of steps | |
| atomic op: op which is observed to either happen fully or not at all (no in-between) | |
| atomic variable: memory location which atomic ops happen to | |
| data race: non-atomic ops from 2+ threads on same memory location where one op is a write | |
| load: an atomic read to observe a value | |
| store: an atomic write to publish a value | |
| read-modify-write (rmw): a read, updating the value, then a write, all atomically |
| #define WIN32_LEAN_AND_MEAN | |
| #include <winsock2.h> | |
| #include <windows.h> | |
| #define SECURITY_WIN32 | |
| #include <security.h> | |
| #include <schannel.h> | |
| #include <shlwapi.h> | |
| #include <assert.h> | |
| #include <stdio.h> |
| static inline __m128i prefix_sum_u8(__m128i x) | |
| { | |
| #if 1 | |
| // alternative form that uses shifts, not the general shuffle network on port 5 (which is a bottleneck | |
| // for us) | |
| x = _mm_add_epi8(x, _mm_slli_epi64(x, 8)); | |
| x = _mm_add_epi8(x, _mm_slli_epi64(x, 16)); | |
| x = _mm_add_epi8(x, _mm_slli_epi64(x, 32)); | |
| x = _mm_add_epi8(x, _mm_shuffle_epi8(x, _mm_setr_epi8(-1,-1,-1,-1,-1,-1,-1,-1, 7,7,7,7,7,7,7,7))); | |
| #else |
| #include <assert.h> | |
| #include <tuple> | |
| #include <vector> | |
| #include <string> | |
| typedef uint32_t Str; | |
| std::vector<const char*> strs; |
| // Lexer | |
| #define TOK_CHAR(ch1, tok1) \ | |
| case ch1: \ | |
| next_ch(); \ | |
| tok = TOK_##tok1; \ | |
| tok_prec = 0; \ | |
| break; | |
| #define TOK_EXPR(ch1, tok1, prec1, lexpr1, rexpr1) \ |
| // x64 encoding | |
| enum Reg { | |
| RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, | |
| R8, R9, R10, R11, R12, R13, R14, R15, | |
| }; | |
| enum XmmReg { | |
| XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, | |
| XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, |
| #if _WIN32 | |
| struct Timer { | |
| LARGE_INTEGER win32_freq; | |
| LARGE_INTEGER win32_start; | |
| Timer() { | |
| QueryPerformanceFrequency(&win32_freq); | |
| win32_start.QuadPart = 0; | |
| } |