Last active
February 20, 2026 10:02
-
-
Save jweinst1/9dd64e8a8f206d81b14d8c9117803ea3 to your computer and use it in GitHub Desktop.
region specific pattern of quantization for floats
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <cstdint> | |
| #include <cstring> | |
| #include <cstdio> | |
| #include <random> | |
| #include <limits> | |
| #include <algorithm> | |
| #include <array> | |
| #include <vector> | |
| #include <cmath> | |
| #include <map> | |
| #include <bitset> | |
| #include <iostream> | |
| static constexpr size_t dimensionLength = 64; | |
| static constexpr size_t bitDimLengt = dimensionLength / 64; | |
| struct FloatVec { | |
| std::array<float, dimensionLength> data{}; | |
| bool operator==(const FloatVec& other) const { | |
| return data == other.data; | |
| } | |
| void print() const { | |
| putc('\n', stdout); | |
| for (int i = 0; i < dimensionLength; ++i) | |
| { | |
| printf("| %f | ", data[i]); | |
| } | |
| putc('\n', stdout); | |
| } | |
| static std::vector<FloatVec> makeRandVec(size_t amount) { | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| float min = -1.0f; | |
| float max = 1.0f; | |
| std::uniform_real_distribution<float> dis( | |
| std::nextafter(min, max), // Just above -1.0 | |
| max // Just below 1.0 (distribution is naturally exclusive of max) | |
| ); | |
| std::vector<FloatVec> fvs; | |
| for (int i = 0; i < amount; ++i) | |
| { | |
| FloatVec fv; | |
| for (int j = 0; j < dimensionLength; ++j) | |
| { | |
| fv.data[j] = dis(gen); | |
| } | |
| fvs.push_back(fv); | |
| } | |
| return fvs; | |
| } | |
| uint64_t quantize() const { | |
| uint64_t result = 0; | |
| for (int i = 0; i < dimensionLength; ++i) | |
| { | |
| uint64_t posBit = !std::signbit(data[i]) ? 1 : 0; | |
| result |= posBit << i; | |
| } | |
| return result; | |
| } | |
| }; | |
| static void printRegions(uint64_t qn) { | |
| for (int i = 0; i < 64; i += 4) | |
| { | |
| if (((qn >> i) & 0xf) == 0xf) { | |
| printf("1 "); | |
| } else { | |
| printf("0 "); | |
| } | |
| } | |
| putc('\n', stdout); | |
| } | |
| static uint16_t quantize16(uint64_t qn) { | |
| uint16_t key = 0; | |
| int j = 0; | |
| for (int i = 0; i < 64; i += 4) | |
| { | |
| if (((qn >> i) & 0xf) == 0xf) { | |
| key |= uint16_t{1} << j; | |
| } | |
| ++j; | |
| } | |
| return key; | |
| } | |
| static uint32_t quantize32(uint64_t qn) { | |
| uint32_t key = 0; | |
| int j = 0; | |
| for (int i = 0; i < 64; i += 2) | |
| { | |
| if (((qn >> i) & 0b11) == 0b11) { | |
| key |= uint16_t{1} << j; | |
| } | |
| ++j; | |
| } | |
| return key; | |
| } | |
| float calculateEuclideanDistance(const FloatVec& a, const FloatVec& b) { | |
| float sum = 0.0f; | |
| for (int i = 0; i < dimensionLength; ++i) { | |
| float diff = a.data[i] - b.data[i]; | |
| sum += diff * diff; | |
| } | |
| return std::sqrt(sum); | |
| } | |
| static FloatVec calcMeanWithVec(const std::vector<FloatVec>& vecs) { | |
| FloatVec fv; | |
| for (const auto& vec: vecs) { | |
| for (int i = 0; i < dimensionLength; ++i) | |
| { | |
| fv.data[i] += vec.data[i]; | |
| } | |
| } | |
| for (int i = 0; i < dimensionLength; ++i) | |
| { | |
| fv.data[i] = fv.data[i] / (float)vecs.size(); | |
| } | |
| return fv; | |
| } | |
| static FloatVec findNearestNeighbor(const std::vector<FloatVec>& vecs, const FloatVec& query) { | |
| float closestDist = std::numeric_limits<float>::max(); | |
| size_t closestVec = (size_t)-1; | |
| for (size_t i = 0; i < vecs.size(); ++i) | |
| { | |
| float dist = calculateEuclideanDistance(query, vecs[i]); | |
| if (closestDist > dist) { | |
| closestDist = dist; | |
| closestVec = i; | |
| } | |
| } | |
| return vecs[closestVec]; | |
| } | |
| static constexpr size_t testSampleSize = 1024 * 100; | |
| int main(int argc, char const *argv[]) | |
| { | |
| std::array<std::vector<FloatVec>, 16> popCounted16{}; | |
| std::array<std::vector<FloatVec>, 32> popCounted32{}; | |
| std::array<std::vector<FloatVec>, 64> popCounted64{}; | |
| std::map<uint16_t, std::vector<FloatVec>> mappedKeys; | |
| std::map<uint32_t, std::vector<FloatVec>> mappedKeys32; | |
| auto vecs = FloatVec::makeRandVec(50000); | |
| for (const auto& vec : vecs) { | |
| uint64_t compressed = vec.quantize(); | |
| uint16_t superComp = quantize16(compressed); | |
| uint32_t superComp32 = quantize32(compressed); | |
| mappedKeys[superComp].push_back(vec); | |
| mappedKeys32[superComp32].push_back(vec); | |
| popCounted16[ __builtin_popcount(superComp) ].push_back(vec); | |
| popCounted32[__builtin_popcount(superComp32)].push_back(vec); | |
| popCounted64[__builtin_popcountll(compressed)].push_back(vec); | |
| } | |
| for (int i = 0; i < 16; ++i) | |
| { | |
| printf("16{%d} - size %zu\n", i, popCounted16[i].size()); | |
| } | |
| for (int i = 0; i < 32; ++i) | |
| { | |
| printf("32{%d} - size %zu\n",i, popCounted32[i].size()); | |
| } | |
| for (int i = 0; i < 64; ++i) | |
| { | |
| printf("64{%d} - size %zu\n",i, popCounted64[i].size()); | |
| } | |
| auto vecQuery = FloatVec::makeRandVec(1000); | |
| puts("NEAREST TEST START__________"); | |
| for (const auto& q : vecQuery) | |
| { | |
| FloatVec baseNearest = findNearestNeighbor(vecs, q); | |
| uint64_t compressed = q.quantize(); | |
| uint16_t superComp = quantize16(compressed); | |
| int index16 = __builtin_popcount(superComp); | |
| FloatVec quantNearest = findNearestNeighbor(popCounted16[index16], q); | |
| printf("Nearest Test index:%d result:%s\n", index16, baseNearest == quantNearest ? "PASS" : "FAIL"); | |
| } | |
| puts("NEAREST TEST END__________"); | |
| FloatVec fv1 = calcMeanWithVec(popCounted16[0]); | |
| FloatVec fvClose = calcMeanWithVec(popCounted16[1]); | |
| FloatVec fv2 = calcMeanWithVec(popCounted16[5]); | |
| printf("Distance16 0 5 is %f\n", calculateEuclideanDistance(fv1, fv2)); | |
| printf("Distance inner 5 is %f\n", calculateEuclideanDistance(popCounted16[5][0], popCounted16[5][1])); | |
| printf("Distance16 0 1 is %f\n", calculateEuclideanDistance(fv1, fvClose)); | |
| size_t first32WithV = 0; | |
| size_t first32WithVNext = 0; | |
| size_t last32WithV = 0; | |
| for (int i = 0; i < 32; ++i) | |
| { | |
| if (popCounted32[i].size() > 0) { | |
| first32WithV = i; | |
| first32WithVNext = i + 1; | |
| break; | |
| } | |
| } | |
| for (int i = 31; i >= 0; --i) | |
| { | |
| if (popCounted32[i].size() > 0) { | |
| last32WithV = i; | |
| break; | |
| } | |
| } | |
| /* | |
| Distance16 0 5 is 1.931163 | |
| Distance16 0 1 is 0.300208 | |
| Distance16 inner 00 6.095141 | |
| first last 32 dist mean 1 and 18 is 6.006310 | |
| NEXT first 32 dist mean 1 and 2 is 2.812121 | |
| */ | |
| FloatVec v32First = calcMeanWithVec(popCounted32[first32WithV]); | |
| FloatVec v32FirstNext = calcMeanWithVec(popCounted32[first32WithVNext]); | |
| FloatVec v32Last = calcMeanWithVec(popCounted32[last32WithV]); | |
| printf("first last 32 dist mean %zu and %zu is %f\n", first32WithV, last32WithV, calculateEuclideanDistance(v32First, v32Last)); | |
| printf("NEXT first 32 dist mean %zu and %zu is %f\n", first32WithV, first32WithVNext, calculateEuclideanDistance(v32First, v32FirstNext)); | |
| // 64 start | |
| size_t first64WithV = 0; | |
| size_t first64WithVNext = 0; | |
| size_t last64WithV = 0; | |
| for (int i = 0; i < 64; ++i) | |
| { | |
| if (popCounted64[i].size() > 0) { | |
| first64WithV = i; | |
| first64WithVNext = i + 1; | |
| break; | |
| } | |
| } | |
| for (int i = 63; i >= 0; --i) | |
| { | |
| if (popCounted64[i].size() > 0) { | |
| last64WithV = i; | |
| break; | |
| } | |
| } | |
| FloatVec v64First = calcMeanWithVec(popCounted64[first64WithV]); | |
| FloatVec v64FirstNext = calcMeanWithVec(popCounted64[first64WithVNext]); | |
| FloatVec v64Last = calcMeanWithVec(popCounted64[last64WithV]); | |
| printf("first last 64 dist mean %zu and %zu is %f\n", first64WithV, last64WithV, calculateEuclideanDistance(v64First, v64Last)); | |
| printf("NEXT first 64 dist mean %zu and %zu is %f\n", first64WithV, first64WithVNext, calculateEuclideanDistance(v64First, v64FirstNext)); | |
| printf("inner 64 %f\n", calculateEuclideanDistance(popCounted64[23][0], popCounted64[23][1])); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment