Skip to content

Instantly share code, notes, and snippets.

@CEXT-Dan
Created November 21, 2025 13:18
Show Gist options
  • Select an option

  • Save CEXT-Dan/50437623c09a5afd6eaf1cb34acd1cdc to your computer and use it in GitHub Desktop.

Select an option

Save CEXT-Dan/50437623c09a5afd6eaf1cb34acd1cdc to your computer and use it in GitHub Desktop.
Parse PNEZD file
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
// Windows Header Files
#include <windows.h>
#include "tchar.h"
#include <iostream>
#include <string>
#include <string_view>
#include <chrono>
#include <cassert>
#include <array>
#include <thread>
#include <vector>
#include <mutex>
#include <algorithm>
#include <cstring>
#include <errno.h>
#include <format>
#include <variant>
#include <filesystem>
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// PerfTimer
class PerfTimer
{
std::chrono::high_resolution_clock::time_point t1;
std::chrono::high_resolution_clock::time_point t2;
public:
PerfTimer();
~PerfTimer() = default;
std::string end();
};
inline PerfTimer::PerfTimer()
{
t1 = std::chrono::high_resolution_clock::now();
}
inline std::string PerfTimer::end()
{
t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsedTime = duration_cast<std::chrono::duration<double>>(t2 - t1);
return std::format("\nDone! {} seconds", elapsedTime.count());
}
/*
* strtod.c --
*
* Source code for the "strtod" library procedure.
*
* Copyright (c) 1988-1993 The Regents of the University of California.
* Copyright (c) 1994 Sun Microsystems, Inc.
*
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*
* RCS: @(#) $Id: strtod.c,v 1.3 2000/02/17 07:11:22 matz Exp $
*/
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// constants for strtodd
static int maxExponent = 511; /* Largest possible base 10 exponent. Any
* exponent larger than this will already
* produce underflow or overflow, so there's
* no need to worry about additional digits.
*/
static double powersOf10[] = { /* Table giving binary powers of 10. Entry */
10., /* is 10^2^i. Used to convert decimal */
100., /* exponents into floating-point numbers. */
1.0e4,
1.0e8,
1.0e16,
1.0e32,
1.0e64,
1.0e128,
1.0e256
};
static double strtodd(const char* string, char** endPtr)
{
int sign, expSign = FALSE;
double fraction, dblExp, * d;
const char* p;
int c;
int exp = 0;
int fracExp = 0;
int mantSize;
int decPt;
const char* pExp;
p = string;
while (isspace(*p))
{
p += 1;
}
if (*p == '-')
{
sign = TRUE;
p += 1;
}
else
{
if (*p == '+')
{
p += 1;
}
sign = FALSE;
}
decPt = -1;
for (mantSize = 0;; mantSize += 1)
{
c = *p;
if (!isdigit(c))
{
if ((c != '.') || (decPt >= 0))
{
break;
}
decPt = mantSize;
}
p += 1;
}
pExp = p;
p -= mantSize;
if (decPt < 0)
{
decPt = mantSize;
}
else
{
mantSize -= 1;
}
if (mantSize > 18)
{
fracExp = decPt - 18;
mantSize = 18;
}
else
{
fracExp = decPt - mantSize;
}
if (mantSize == 0)
{
fraction = 0.0;
p = string;
goto done;
}
else
{
int frac1, frac2;
frac1 = 0;
for (; mantSize > 9; mantSize -= 1)
{
c = *p;
p += 1;
if (c == '.')
{
c = *p;
p += 1;
}
frac1 = 10 * frac1 + (c - '0');
}
frac2 = 0;
for (; mantSize > 0; mantSize -= 1)
{
c = *p;
p += 1;
if (c == '.')
{
c = *p;
p += 1;
}
frac2 = 10 * frac2 + (c - '0');
}
fraction = (1.0e9 * frac1) + frac2;
}
p = pExp;
if ((*p == 'E') || (*p == 'e'))
{
p += 1;
if (*p == '-')
{
expSign = TRUE;
p += 1;
}
else
{
if (*p == '+')
{
p += 1;
}
expSign = FALSE;
}
while (isdigit(*p))
{
exp = exp * 10 + (*p - '0');
p += 1;
}
}
if (expSign)
{
exp = fracExp - exp;
}
else
{
exp = fracExp + exp;
}
if (exp < 0)
{
expSign = TRUE;
exp = -exp;
}
else
{
expSign = FALSE;
}
if (exp > maxExponent)
{
exp = maxExponent;
errno = ERANGE;
}
dblExp = 1.0;
for (d = powersOf10; exp != 0; exp >>= 1, d += 1)
{
if (exp & 01)
{
dblExp *= *d;
}
}
if (expSign)
{
fraction /= dblExp;
}
else
{
fraction *= dblExp;
}
done:
if (endPtr != NULL)
{
*endPtr = (char*)p;
}
if (sign)
{
return -fraction;
}
return fraction;
}
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// utf8_to_wstr
#pragma warning(push)
#pragma warning(disable: 4267)
static std::wstring utf8_to_wstr(const std::string_view str8) noexcept
{
const int count = MultiByteToWideChar(CP_UTF8, 0, str8.data(), (int)str8.length(), NULL, 0);
std::wstring wstr(count, 0);
if (count > 0)
MultiByteToWideChar(CP_UTF8, 0, str8.data(), (int)str8.length(), &wstr[0], count);
return wstr;
}
#pragma warning(pop)
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// FileHnd
class FileHnd
{
public:
FileHnd(HANDLE h)
:_h(h)
{
}
~FileHnd()
{
if (CloseHandle(_h) == FALSE)
assert(0);
}
HANDLE hnd() const
{
return _h;
}
private:
HANDLE _h;
};
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// FileMap
class FileMap
{
public:
FileMap(LPVOID fmap, size_t len)
:_fmap((char*)fmap), _fend((char*)fmap + len), _len(len)
{
}
~FileMap()
{
if (UnmapViewOfFile(_fmap) == FALSE)
assert(0);
}
char* beginf() const
{
return _fmap;
}
char* endf() const
{
return _fend;
}
size_t size() const
{
return _len;
}
std::string_view view() const
{
return std::string_view(_fmap, _len);
}
private:
char* _fmap = nullptr;
char* _fend = nullptr;
size_t _len = 0;
};
//-==-==-===-=-=-=-==-=-=-=-=--=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=
// chuck the file into line-aligned segments
static std::vector<std::string_view> makefileSegments(const std::string_view fv)
{
std::vector<std::string_view> segments;
const size_t hw_threads = std::thread::hardware_concurrency() ? std::thread::hardware_concurrency() : 4;
constexpr size_t min_chunk_size = 16 * 1024; // 16 KB, best?
segments.reserve(hw_threads);
if (fv.size() <= min_chunk_size || hw_threads <= 1)
{
segments.push_back(fv);
}
else
{
const size_t approx_chunk = fv.size() / hw_threads;
const char* data = fv.data();
const char* endp = data + fv.length();
for (size_t i = 0; i < hw_threads; ++i)
{
const char* seg_begin = data + i * approx_chunk;
const char* seg_end = (i + 1 == hw_threads) ? endp : (data + (i + 1) * approx_chunk);
if (seg_begin != data && seg_begin < endp)
{
size_t remaining = static_cast<size_t>(endp - seg_begin);
const void* p = memchr(seg_begin, '\n', remaining);
if (p)
seg_begin = static_cast<const char*>(p) + 1;
else
seg_begin = endp; // nothing left
}
if (seg_end < endp)
{
size_t remaining = static_cast<size_t>(endp - seg_end);
const void* p = memchr(seg_end, '\n', remaining);
if (p)
seg_end = static_cast<const char*>(p); // will include up to the '\n' in this segment parsing
else
seg_end = endp;
}
if (seg_begin < seg_end)
{
segments.emplace_back(seg_begin, static_cast<size_t>(seg_end - seg_begin));
}
}
}
return segments;
}
struct PNEZD
{
std::variant<size_t, std::wstring> _id;
std::array<double, 3> point{ 0.0 };
std::wstring _description;
};
using PNEZDArray = std::vector<PNEZD>;
static void parse_pnezd_range(std::string_view range, char dlm, PNEZDArray& out)
{
size_t pos = 0, len = range.size();
while (pos < len)
{
PNEZD pnezd{};
// p
size_t p = range.find(dlm, pos);
if (p == std::string_view::npos) [[unlikely]]
break;
const auto id_str = range.substr(pos, p - pos);
if (std::isdigit(id_str[0]))
pnezd._id = static_cast<size_t>(strtoll(id_str.data(), NULL, 10));
else
pnezd._id = utf8_to_wstr(id_str);
pos = p + 1;
// n
size_t n = range.find(dlm, pos);
if (n == std::string_view::npos) [[unlikely]]
break;
pnezd.point[1] = strtodd(range.data() + pos, NULL);
pos = n + 1;
// e
size_t e = range.find(dlm, pos);
if (e == std::string_view::npos) [[unlikely]]
break;
pnezd.point[0] = strtodd(range.data() + pos, NULL);
pos = e + 1;
// z
size_t z = range.find(dlm, pos);
if (z == std::string_view::npos) [[unlikely]]
break;
pnezd.point[2] = strtodd(range.data() + pos, NULL);
pos = z + 1;
size_t d = range.find('\r', pos);
if (d != std::string_view::npos)
pnezd._description = utf8_to_wstr(range.substr(pos, d - pos));
else
pnezd._description = utf8_to_wstr(range.substr(pos, len - pos));
pos = d + 1;
out.push_back(std::move(pnezd));
}
}
static bool parse_pnezd(const std::filesystem::path& inpath, char dlm, PNEZDArray& penzdList)
{
try
{
FileHnd fh(CreateFileA(inpath.string().c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL));
if (fh.hnd() == INVALID_HANDLE_VALUE)
return false;
LARGE_INTEGER fileSize;
if (!GetFileSizeEx(fh.hnd(), &fileSize))
return false;
FileHnd fhm(CreateFileMapping(fh.hnd(), NULL, PAGE_READONLY, 0, 0, NULL));
if (fhm.hnd() != INVALID_HANDLE_VALUE)
{
size_t nbytes = static_cast<size_t>(fileSize.QuadPart);
FileMap mv(MapViewOfFile(fhm.hnd(), FILE_MAP_READ, 0, 0, nbytes), nbytes);
auto segments = makefileSegments(mv.view());
if (segments.size() == 1)
{
// Single-threaded parse
parse_pnezd_range(segments[0], dlm, penzdList);
}
else
{
// Multi-threaded parse
std::vector<PNEZDArray> per_thread_results(segments.size());
{
std::vector<std::jthread> jthreads;
jthreads.reserve(segments.size());
for (size_t i = 0; i < segments.size(); ++i)
{
const auto seg = segments[i];
jthreads.emplace_back([seg, &per_thread_results, i, dlm]()
{
parse_pnezd_range(seg, dlm, per_thread_results[i]);
});
}
}
// Merge results
size_t total = 0;
for (const auto& v : per_thread_results)
total += v.size();
penzdList.reserve(total);
for (auto& v : per_thread_results)
{
for (auto& item : v)
penzdList.push_back(std::move(item));
}
}
}
return true;
}
catch (...)
{
std::cout << "Exception parsing PNEZD file.\n";
}
return false;
}
int main(int argc, char* argv[])
{
PerfTimer timer;
const std::string inpath("E:\\PNEZD_2M.txt");
const char dlm = ',';
PNEZDArray penzdList;
parse_pnezd(inpath, dlm, penzdList);
std::cout << timer.end() << " lines = " << penzdList.size() << "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment