Skip to content

Instantly share code, notes, and snippets.

@MEhrn00
Created January 12, 2025 03:59
Show Gist options
  • Select an option

  • Save MEhrn00/972781642a7c5244a95bafd20af66a01 to your computer and use it in GitHub Desktop.

Select an option

Save MEhrn00/972781642a7c5244a95bafd20af66a01 to your computer and use it in GitHub Desktop.
Generating COFFs for embedding files in C/C++ programs

The commonly seen method of embedding arbitrary file data in a C/C++ program is by converting the file data into a C byte array and placing it in a header file.

#indef MYDATA_H
#define MYDATA_H

const char FILEDATA[] = { 0x41, 0x41, 0x41, 0x41, ... };

#endif // MYDATA_H

This can cause issues where code analysis tools, like auto completion, will choke up when trying to process this file.

An alternative way of embedding files in a C/C++ program is by converting the file into a linkable object file and linking it directly into the program.

GNU/MinGW ld can do this using the -r and -b flags.

x86_64-w64-mingw32-gcc -r -b binary -o hello.o hello.txt

The object file contains a couple exported symbols which can be used to reference the input file data.

#include <stdio.h>

extern const char _binary_hello_txt_start[];
extern const char _binary_hello_txt_end;

int main(void) {
    printf("Array: %p\n", _binary_hello_txt_start);
    printf("Array end: %p\n", &_binary_hello_txt_end);
    printf("Size: %llu\n", &_binary_hello_txt_end - _binary_hello_txt_start);
    return 0;
}
x86_64-w64-mingw32-gcc -o test.exe main.c hello.o
.\test.exe
Array: 00007ff62af79000
Array end: 00007ff62af7900d
Size: 13

https://stackoverflow.com/a/4158997

LLVM ld can also do this; however, it only supports generating ELFs and the Windows version strips out the -r and -b flags.

AFAIK, there are no native tools on Windows that can do this.

The way this works is that ld will create a minimal object file with a single .data/.rdata section that contains the embedded file data.

COFFs are a fairly straight forward file format and since there is no need for relocations, it is pretty easy to write a tool that mimics what ld does (self-contained python script for this below).

This can be integrated into a build system like make, cmake, Visual Studio, etc. to automatically generate it at build time.

#!/usr/bin/env python3
"""bin2coff.py
usage: bin2coff.py [-h] [-s SYMBOL] [-m {amd64,i386,arm,arm64}] input [output]
Converts an arbitrary file into a linkable COFF.
positional arguments:
input Input file for generating the COFF
output Output for the generated COFF (defaults to the input file name with a '.o' extension)
options:
-h, --help show this help message and exit
-s, --symbol SYMBOL Name of the output symbol (defaults to the name of the input file with '.' replaced with '_')
-m, --machine {amd64,i386,arm,arm64}
Machine value for the COFF
"""
from __future__ import annotations
import argparse
import enum
import io
import mmap
import os
import pathlib
import struct
from dataclasses import dataclass, astuple, field
__author__ = "Matt Ehrnschwender"
__license__ = "MIT"
class DataclassStruct(struct.Struct):
def __init__(self, form: str):
super().__init__(form)
def pack(self) -> bytes:
return super().pack(*astuple(self))
def pack_into(self, buffer, offset):
return super().pack_into(buffer, offset, *astuple(self))
class CoffCharacteristics(enum.IntFlag):
RelocsStripped = 0x0001
LineNumsStripped = 0x0004
@dataclass
class CoffFileHeader(DataclassStruct):
machine: CoffMachine
number_of_sections: int = 0
timedate_stamp: int = 0
pointer_to_symbol_table: int = 0
number_of_symbols: int = 0
size_of_optional_header: int = 0
characteristics: CoffCharacteristics = CoffCharacteristics.LineNumsStripped
def __post_init__(self):
super().__init__("<2H3I2H")
class CoffMachine(enum.IntEnum):
Amd64 = 0x8664
I386 = 0x14c
Arm = 0x1c0
Arm64 = 0xaa64
def __str__(self):
return self.name.lower()
@staticmethod
def from_str(s: str) -> CoffMachine:
match s:
case "amd64":
return CoffMachine.Amd64
case "i386":
return CoffMachine.I386
case "arm":
return CoffMachine.Arm
case "arm64":
return CoffMachine.Arm64
case _:
raise ValueError()
class CoffSectionCharacteristics(enum.IntFlag):
Reserved0 = 0
CntInitializedData = 0x40
Align16Bytes = 0x00500000
MemRead = 0x40000000
MemWrite = 0x80000000
@dataclass
class CoffSectionHeader(DataclassStruct):
name: bytes = bytes(8)
virtual_size: int = 0
virtual_address: int = 0
size_of_raw_data: int = 0
pointer_to_raw_data: int = 0
pointer_to_relocations: int = 0
pointer_to_line_numbers: int = 0
number_of_relocations: int = 0
number_of_line_numbers: int = 0
characteristics: CoffSectionCharacteristics = CoffSectionCharacteristics.Reserved0
def __post_init__(self):
super().__init__("<8s6I2HI")
class CoffSymbolComplexType(enum.IntEnum):
Null = 0
class CoffSymbolBaseType(enum.IntEnum):
Null = 0
@dataclass
class CoffSymbolType(DataclassStruct):
complex: CoffSymbolComplexType = CoffSymbolComplexType.Null
base: CoffSymbolBaseType = CoffSymbolBaseType.Null
def __int__(self):
return self.complex << 8 | self.base
class CoffSectionNumberValue(enum.IntEnum):
Undefined = 0
Absolute = -1
Debug = -2
class CoffSymbolStorageClass(enum.IntEnum):
Null = 0
External = 2
@dataclass
class CoffSymbol(DataclassStruct):
name: bytes = bytes(8)
value: int = 0
section_number: CoffSectionNumberValue = CoffSectionNumberValue.Undefined
symbol_type: int = field(default_factory=lambda: int(CoffSymbolType()))
storage_class: CoffSymbolStorageClass = CoffSymbolStorageClass.Null
number_of_aux_symbols: int = 0
def __post_init__(self):
super().__init__("<8sIhH2B")
class StringTable:
_data: bytearray = bytearray()
def add_string(self, s: str) -> int:
offset = len(self._data)
self._data.extend(s.encode() + b'\0')
return offset + 4
def pack(self) -> bytes:
return struct.pack("<I", len(self._data) + 4) + bytes(self._data)
@property
def size(self) -> int:
return 4 + len(self._data)
class CoffSymbolTable:
_tbl: list[CoffSymbol]
def __init__(self):
self._tbl = []
def add_symbol(self, string_table: SringTable, **kwargs) -> StringTable:
if symbol_name := kwargs.get("name"):
if len(symbol_name) > 8:
offset = string_table.add_string(symbol_name)
symbol_name = struct.pack("<II", 0, offset)
else:
symbol_name = symbol_name.encode().ljust(8, b'\0')
kwargs["name"] = symbol_name
symbol = CoffSymbol(**kwargs)
self._tbl.append(symbol)
return string_table
@property
def size(self) -> int:
return len(self._tbl) * CoffSymbol.size
def pack(self) -> bytes:
packed = bytearray()
for symbol in self._tbl:
packed += symbol.pack()
return packed
class CoffBuilder:
_inputfp: io.FileIO
_inputsize: int
symbol: str
machine: CoffMachine
def __init__(self, inputfp: io.FileIO, symbol: str, machine: CoffMachine):
self._inputfp = inputfp
inputfp.seek(0, os.SEEK_END)
self._inputsize = inputfp.tell()
inputfp.seek(0)
self.symbol = symbol
self.machine = machine
def write_output(self, outfp: io.BytesIO):
string_table = StringTable()
symbol_table = CoffSymbolTable()
string_table = symbol_table.add_symbol(
string_table,
name=f"{self.symbol}_start",
section_number=1,
storage_class=CoffSymbolStorageClass.External,
)
string_table = symbol_table.add_symbol(
string_table,
name=f"{self.symbol}_end",
value=self._inputsize,
section_number=1,
storage_class=CoffSymbolStorageClass.External
)
input_len = self._inputsize + 1
input_len = ((input_len + 0xf) & (-0x10))
string_table = symbol_table.add_symbol(
string_table,
name=f"{self.symbol}_size",
value=input_len,
section_number=1,
storage_class=CoffSymbolStorageClass.External,
)
section_len = input_len + 8
header_size = CoffFileHeader(self.machine).size + CoffSectionHeader().size
pointer_to_raw_data = header_size
pointer_to_symbol_table = pointer_to_raw_data + section_len
rdata_section_header = CoffSectionHeader(
name=b".rdata",
size_of_raw_data=section_len,
pointer_to_raw_data=pointer_to_raw_data,
characteristics=CoffSectionCharacteristics.Align16Bytes | CoffSectionCharacteristics.MemRead | CoffSectionCharacteristics.CntInitializedData
)
file_header = CoffFileHeader(
machine=self.machine,
number_of_sections=1,
pointer_to_symbol_table=pointer_to_symbol_table,
number_of_symbols=3,
characteristics=CoffCharacteristics.LineNumsStripped | CoffCharacteristics.RelocsStripped,
)
outfp.write(file_header.pack())
outfp.write(rdata_section_header.pack())
if self._inputsize > 8 * 2**20:
with mmap.mmap(self._inputfp.fileno(), 0) as mm:
outfp.write(mm)
else:
buffer = self._inputfp.read()
outfp.write(buffer)
outfp.write(bytearray(input_len - self._inputsize))
outfp.write(struct.pack("<Q", self._inputsize))
outfp.write(symbol_table.pack())
outfp.write(string_table.pack())
def parse_arguments():
parser = argparse.ArgumentParser(
prog="bin2coff.py",
description="Converts an arbitrary file into a linkable COFF."
)
parser.add_argument(
"input",
help="Input file for generating the COFF",
type=pathlib.Path,
)
parser.add_argument(
"-s",
"--symbol",
help="Name of the output symbol (defaults to the name of the input file with '.' replaced with '_')",
type=str,
default=None
)
parser.add_argument(
"-m",
"--machine",
help="Machine value for the COFF",
type=CoffMachine.from_str,
choices=list(CoffMachine),
default=CoffMachine.Amd64,
)
parser.add_argument(
"output",
help="Output for the generated COFF (defaults to the input file name with a '.o' extension)",
type=pathlib.Path,
nargs="?",
default=None
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_arguments()
if args.symbol is None:
args.symbol = args.input.name.replace(".", "_")
if args.output:
output = args.output
else:
parent = args.input.parent
output = parent.joinpath(args.input.stem).with_suffix(".o")
with args.input.open("rb") as f:
builder = CoffBuilder(f, args.symbol, args.machine)
with output.open("wb") as f:
builder.write_output(f)
cmake_minimum_required(VERSION 3.18)
project(example LANGUAGES C)
find_package(Python REQUIRED COMPONENTS Interpreter)
add_custom_command(
OUTPUT hello.o
COMMAND
${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bin2coff.py
${CMAKE_CURRENT_SOURCE_DIR}/hello.txt
hello.o
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hello.txt
COMMENT "Generating hello.o from hello.txt"
VERBATIM
)
add_custom_target(hello_gen DEPENDS hello.o)
add_library(hello OBJECT IMPORTED)
set_target_properties(hello PROPERTIES
IMPORTED_OBJECTS
"${CMAKE_CURRENT_BINARY_DIR}/hello.o"
)
add_executable(main main.c)
target_link_libraries(main hello)
Hello World
#include <stdio.h>
#include <stdint.h>
extern const char hello_txt_start[];
extern const char hello_txt_end;
extern size_t hello_txt_size;
int main(void) {
printf("Array: %p\n", hello_txt_start);
printf("Array end: %p\n", &hello_txt_end);
printf("Size: %llu\n", hello_txt_size);
return 0;
}
CC = x86_64-w64-mingw32-gcc
PYTHON = python3
BIN2COFF = scripts/bin2coff.py
.PHONY : all clean
all : main.exe
clean:
rm main.exe main.o hello.o
main.exe : main.o hello.o
$(CC) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
hello.o : hello.txt
$(PYTHON) $(BIN2COFF) -m amd64 $< $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment