Created
March 4, 2026 23:56
-
-
Save diachedelic/6e879e5414523c7808701ed94fc281ce to your computer and use it in GitHub Desktop.
Search the MaxMind binary GeoLite City IP database using only plain JavaScript.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Search the MaxMind binary GeoLite City IP database using only plain | |
| // JavaScript. | |
| // USAGE | |
| // import make_maxmind from "./maxmind.js"; | |
| // const db_bytes = new Uint8Array(...mmdb file...); | |
| // const maxmind = make_maxmind(db_bytes); | |
| // const result = maxmind.lookup("8.8.8.8"); | |
| // console.log(result.data); // {country: {...}, location: {...}, ...} | |
| /*jslint web, bitwise, global */ | |
| // File layout: | |
| // [search tree][16-byte separator][data section][metadata section] | |
| // The metadata section is located by scanning backwards for the marker | |
| // \xab\xcd\xef followed by "MaxMind.com" (14 bytes total). | |
| // The data separator uses the same 14-byte sequence zero-padded to 16. | |
| const data_separator_size = 16; | |
| const metadata_marker = new Uint8Array([ | |
| 0xAB, 0xCD, 0xEF, | |
| 0x4D, 0x61, 0x78, 0x4D, 0x69, 0x6E, 0x64, 0x2E, 0x63, 0x6F, 0x6D | |
| ]); | |
| const utf8 = new TextDecoder(); | |
| // Decoding. Each value starts with a control byte: | |
| // bits 7-5: type tag (0=extended, 1=pointer, 2-7=direct type) | |
| // bits 4-0: size / pointer-size info (type-specific) | |
| // Extended types: when tag == 0, next byte n gives actual type = n + 7. | |
| // Pointers: bits 4-3 = pointer size (0-3), bits 2-0 = 3 MSBits of value. | |
| // All others: bits 4-0 encode the payload byte count (0-28 directly; | |
| // 29/30/31 mean read 1/2/3 extra bytes for the actual count). | |
| function decode(buf, dv, off, data_start) { | |
| const ctrl = buf[off]; | |
| off += 1; | |
| let type = ctrl >> 5; | |
| if (type === 0) { | |
| // Extended type: actual type = next byte + 7 | |
| type = buf[off] + 7; | |
| off += 1; | |
| } | |
| if (type === 1) { | |
| // Pointer: offset from data_start into the data section. | |
| const pointer_size = (ctrl >> 3) & 0x3; // bits 4-3: (0-3) | |
| const msb = ctrl & 0x7; // bits 2-0: most-significant bits | |
| let pointer; | |
| if (pointer_size === 0) { | |
| pointer = (msb << 8) | buf[off]; | |
| off += 1; | |
| } else if (pointer_size === 1) { | |
| pointer = ( | |
| (msb << 16) | |
| | (buf[off] << 8) | |
| | buf[off + 1] | |
| ) + 0x800; | |
| off += 2; | |
| } else if (pointer_size === 2) { | |
| pointer = ( | |
| (msb << 24) | |
| | (buf[off] << 16) | |
| | (buf[off + 1] << 8) | |
| | buf[off + 2] | |
| ) + 0x80800; | |
| off += 3; | |
| } else { | |
| // pointer_size === 3: full 4-byte pointer, msb bits are ignored | |
| pointer = ( | |
| (buf[off] << 24) | |
| | (buf[off + 1] << 16) | |
| | (buf[off + 2] << 8) | |
| | buf[off + 3] | |
| ) >>> 0; | |
| off += 4; | |
| } | |
| // Follow the pointer. Advance off past the pointer bytes, not past the target. | |
| const target = decode(buf, dv, data_start + pointer, data_start); | |
| return { | |
| value: target.value, | |
| off | |
| }; | |
| } | |
| // Determine actual byte length from the low 5 bits (and possible extra bytes). | |
| let size = ctrl & 0x1F; | |
| if (size === 29) { | |
| size = buf[off] + 29; | |
| off += 1; | |
| } else if (size === 30) { | |
| size = buf[off + 1] | (buf[off] << 8); | |
| size += 285; | |
| off += 2; | |
| } else if (size === 31) { | |
| size = buf[off + 2] | (buf[off + 1] << 8) | (buf[off] << 16); | |
| size += 65821; | |
| off += 3; | |
| } | |
| if (type === 2) { | |
| // UTF-8 string | |
| const str = utf8.decode(buf.subarray(off, off + size)); | |
| return { | |
| value: str, | |
| off: off + size | |
| }; | |
| } | |
| if (type === 3) { | |
| // Double (IEEE 754, big-endian, 8 bytes) | |
| return {value: dv.getFloat64(off, false), off: off + 8}; | |
| } | |
| if (type === 4) { | |
| // Bytes | |
| return {value: buf.slice(off, off + size), off: off + size}; | |
| } | |
| if (type === 5 || type === 6) { | |
| // Uint16/Uint32 (variable big-endian) | |
| let value = 0; | |
| let byte_nr = 0; | |
| while (byte_nr < size) { | |
| value = (value * 256 + buf[off + byte_nr]) >>> 0; | |
| byte_nr += 1; | |
| } | |
| return { | |
| value, | |
| off: off + size | |
| }; | |
| } | |
| if (type === 7) { | |
| // Map: 'size' key-value pairs | |
| const value = {}; | |
| let byte_nr = 0; | |
| while (byte_nr < size) { | |
| const k = decode(buf, dv, off, data_start); | |
| off = k.off; | |
| const v = decode(buf, dv, off, data_start); | |
| off = v.off; | |
| value[k.value] = v.value; | |
| byte_nr += 1; | |
| } | |
| return {value, off}; | |
| } | |
| if (type === 8) { | |
| // Int32 (signed, big-endian, variable length) | |
| let value = 0; | |
| let byte_nr = 0; | |
| while (byte_nr < size) { | |
| value = (value << 8) | buf[off + byte_nr]; | |
| byte_nr += 1; | |
| } | |
| // Sign-extend to full 32 bits when fewer than 4 bytes stored | |
| if (size > 0 && size < 4) { | |
| const shift = (4 - size) * 8; | |
| value = (value << shift) >> shift; | |
| } | |
| return { | |
| value: value | 0, | |
| off: off + size | |
| }; | |
| } | |
| if (type === 9) { | |
| // Uint64 (Number, safe for values < 2^53) | |
| let value = 0; | |
| let byte_nr = 0; | |
| while (byte_nr < size) { | |
| value = value * 256 + buf[off + byte_nr]; | |
| byte_nr += 1; | |
| } | |
| return { | |
| value, | |
| off: off + size | |
| }; | |
| } | |
| // JSLint does not permit the usage of BigInt. However, the Uint128 data type | |
| // seems to be unused. | |
| // if (type === 10) { | |
| // // Uint128 (BigInt) | |
| // let value = 0n; | |
| // let byte_nr = 0; | |
| // while (byte_nr < size) { | |
| // value = (value << 8n) | BigInt(buf[off + byte_nr]); | |
| // byte_nr += 1; | |
| // } | |
| // return { | |
| // value, | |
| // off: off + size | |
| // }; | |
| // } | |
| if (type === 11) { | |
| // Array: 'size' elements | |
| const value = []; | |
| let byte_nr = 0; | |
| while (byte_nr < size) { | |
| const item = decode(buf, dv, off, data_start); | |
| off = item.off; | |
| value.push(item.value); | |
| byte_nr += 1; | |
| } | |
| return {value, off}; | |
| } | |
| if (type === 14) { | |
| // Boolean (size encodes: 0=false, 1=true) | |
| return { | |
| value: size !== 0, | |
| off | |
| }; | |
| } | |
| if (type === 15) { | |
| // Float (IEEE 754, big-endian, 4 bytes) | |
| return { | |
| value: dv.getFloat32(off, false), | |
| off: off + 4 | |
| }; | |
| } | |
| throw new Error("Unknown MaxMind DB data type: " + type); | |
| } | |
| function find_metadata_offset(buf) { | |
| let position = buf.length - metadata_marker.length; | |
| while (position >= 0) { | |
| let byte_nr = 0; | |
| let match = true; | |
| while (byte_nr < metadata_marker.length) { | |
| if (buf[position + byte_nr] !== metadata_marker[byte_nr]) { | |
| match = false; | |
| break; | |
| } | |
| byte_nr += 1; | |
| } | |
| if (match) { | |
| return position + metadata_marker.length; // first byte after marker | |
| } | |
| position -= 1; | |
| } | |
| throw new Error("Metadata marker not found"); | |
| } | |
| // Search tree. Each node holds two records (left for bit=0, right for bit=1). | |
| // Node layout per record_size: | |
| // 24 bits: 3 bytes left | 3 bytes right (node = 6 bytes) | |
| // 28 bits: 3 bytes + high nibble of middle byte (node = 7 bytes) | |
| // 32 bits: 4 bytes left | 4 bytes right (node = 8 bytes) | |
| function read_node(buf, node_num, bit, node_size, record_size) { | |
| const b = node_num * node_size; | |
| if (record_size === 24) { | |
| return ( | |
| bit === 0 | |
| ? ( | |
| (buf[b] << 16) | |
| | (buf[b + 1] << 8) | |
| | buf[b + 2] | |
| ) >>> 0 | |
| : ( | |
| (buf[b + 3] << 16) | |
| | (buf[b + 4] << 8) | |
| | buf[b + 5] | |
| ) >>> 0 | |
| ); | |
| } | |
| if (record_size === 28) { | |
| // Middle byte (b+3): high nibble = MSBits of left, | |
| // low nibble = MSBits of right. | |
| return ( | |
| bit === 0 | |
| ? ( | |
| ((buf[b + 3] >> 4) << 24) | |
| | (buf[b] << 16) | |
| | (buf[b + 1] << 8) | |
| | buf[b + 2] | |
| ) >>> 0 | |
| : ( | |
| ((buf[b + 3] & 0x0F) << 24) | |
| | (buf[b + 4] << 16) | |
| | (buf[b + 5] << 8) | |
| | buf[b + 6] | |
| ) >>> 0 | |
| ); | |
| } | |
| if (record_size === 32) { | |
| return ( | |
| bit === 0 | |
| ? ( | |
| (buf[b] << 24) | |
| | (buf[b + 1] << 16) | |
| | (buf[b + 2] << 8) | |
| | buf[b + 3] | |
| ) >>> 0 | |
| : ( | |
| (buf[b + 4] << 24) | |
| | (buf[b + 5] << 16) | |
| | (buf[b + 6] << 8) | |
| | buf[b + 7] | |
| ) >>> 0 | |
| ); | |
| } | |
| throw new Error("Unsupported record size " + record_size); | |
| } | |
| function parse_ipv4(ip) { | |
| const octets = ip.split(".").map(Number); | |
| if ( | |
| octets.length !== 4 | |
| || octets.some(function (octet) { | |
| return !Number.isSafeInteger(octet) || octet < 0 || octet > 255; | |
| }) | |
| ) { | |
| throw new Error("Invalid IPv4: " + ip); | |
| } | |
| return new Uint8Array(octets); | |
| } | |
| function parse_ipv6(ip) { | |
| const has_double_colon = ip.includes("::"); | |
| const halves = ( | |
| has_double_colon | |
| ? ip.split("::") | |
| : [ip, undefined] | |
| ); | |
| const left_str = halves[0]; | |
| const right_str = halves[1]; | |
| const left = ( | |
| left_str | |
| ? left_str.split(":").filter(Boolean) | |
| : [] | |
| ); | |
| const right = ( | |
| right_str !== undefined | |
| ? ( | |
| right_str | |
| ? right_str.split(":").filter(Boolean) | |
| : [] | |
| ) | |
| : undefined | |
| ); | |
| let groups; | |
| if (right !== undefined) { | |
| const pad_count = 8 - left.length - right.length; | |
| const zeros = []; | |
| let pi = 0; | |
| while (pi < pad_count) { | |
| zeros.push("0"); | |
| pi += 1; | |
| } | |
| groups = left.concat(zeros).concat(right); | |
| } else { | |
| groups = left; | |
| } | |
| if (groups.length !== 8) { | |
| throw new Error("Invalid IPv6: " + ip); | |
| } | |
| const bytes = new Uint8Array(16); | |
| let byte_nr = 0; | |
| while (byte_nr < 8) { | |
| const hextet = Number.parseInt(groups[byte_nr], 16); | |
| bytes[byte_nr * 2] = (hextet >> 8) & 0xFF; | |
| bytes[byte_nr * 2 + 1] = hextet & 0xFF; | |
| byte_nr += 1; | |
| } | |
| return bytes; | |
| } | |
| function traverse(buf, ip_bytes, start_node, node_count, node_size, rec_size) { | |
| const total_bits = ip_bytes.length * 8; | |
| let node = start_node; | |
| let bit_nr = 0; | |
| while (bit_nr < total_bits && node < node_count) { | |
| const byte_nr = bit_nr >> 3; | |
| const lsb = bit_nr & 0b111; | |
| const byte = ip_bytes[byte_nr]; | |
| const bit = (byte >> (7 - lsb)) & 1; | |
| node = read_node(buf, node, bit, node_size, rec_size); | |
| bit_nr += 1; | |
| } | |
| return { | |
| node, | |
| depth: bit_nr | |
| }; | |
| } | |
| function make_maxmind(db_bytes) { | |
| const dv = new DataView( | |
| db_bytes.buffer, | |
| db_bytes.byteOffset, | |
| db_bytes.byteLength | |
| ); | |
| const metadata_offset = find_metadata_offset(db_bytes); | |
| const meta = decode(db_bytes, dv, metadata_offset, metadata_offset).value; | |
| const node_size = Math.ceil((meta.record_size * 2) / 8); | |
| const tree_size = meta.node_count * node_size; | |
| const data_start = tree_size + data_separator_size; | |
| let ipv4_start; | |
| if (meta.ip_version === 6) { | |
| let node = 0; | |
| let i = 0; | |
| while (i < 96 && node < meta.node_count) { | |
| node = read_node(db_bytes, node, 0, node_size, meta.record_size); | |
| i += 1; | |
| } | |
| ipv4_start = node; | |
| } else { | |
| ipv4_start = 0; | |
| } | |
| function lookup(ip) { | |
| const v6 = ip.includes(":"); | |
| const ip_bytes = ( | |
| v6 | |
| ? parse_ipv6(ip) | |
| : parse_ipv4(ip) | |
| ); | |
| const {node_count, record_size} = meta; | |
| const is_v4_in_v6 = meta.ip_version === 6 && !v6; | |
| const start = ( | |
| is_v4_in_v6 | |
| ? ipv4_start | |
| : 0 | |
| ); | |
| const {node, depth} = traverse( | |
| db_bytes, | |
| ip_bytes, | |
| start, | |
| node_count, | |
| node_size, | |
| record_size | |
| ); | |
| // node === node_count: empty record (IP not in DB) | |
| // node < node_count: all bits consumed without terminal | |
| if (node <= node_count) { | |
| return {depth}; | |
| } | |
| // node > node_count: offset into data section | |
| const off = data_start + (node - node_count - data_separator_size); | |
| const data = decode(db_bytes, dv, off, data_start).value; | |
| return {data, depth}; | |
| } | |
| return Object.freeze({lookup, meta}); | |
| } | |
| if (import.meta.main) { | |
| fetch( | |
| "https://raw.githubusercontent.com" | |
| + "/maxmind/MaxMind-DB/refs/heads/main" | |
| + "/test-data/GeoLite2-City-Test.mmdb" | |
| ).then(function (response) { | |
| return response.arrayBuffer(); | |
| }).then(function (array_buffer) { | |
| const db_bytes = new Uint8Array(array_buffer); | |
| const maxmind = make_maxmind(db_bytes); | |
| const a = maxmind.lookup("2a02:d100::0001"); | |
| globalThis.console.log(a); | |
| if (a.data.location.time_zone !== "Europe/Warsaw") { | |
| throw new Error("FAIL: wrong time zone"); | |
| } | |
| if (a.depth !== 29) { | |
| throw new Error("FAIL: bad depth"); | |
| } | |
| const b = maxmind.lookup("8.8.8.8"); | |
| if (b.data !== undefined) { | |
| throw new Error("FAIL: wrong time zone"); | |
| } | |
| let threw = false; | |
| try { | |
| maxmind.lookup_city("8.8.8.500"); | |
| } catch (_) { | |
| threw = true; | |
| } | |
| if (!threw) { | |
| throw new Error("FAIL: accepted invalid IPv4"); | |
| } | |
| if (!maxmind.meta.languages.includes("en")) { | |
| throw new Error("FAIL: bad metadata"); | |
| } | |
| globalThis.console.log("Pass"); | |
| }); | |
| } | |
| export default Object.freeze(make_maxmind); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment