Skip to content

Instantly share code, notes, and snippets.

@evadne
Created November 16, 2025 19:44
Show Gist options
  • Select an option

  • Save evadne/c1581509cbeec2588bb387340b40ce9e to your computer and use it in GitHub Desktop.

Select an option

Save evadne/c1581509cbeec2588bb387340b40ce9e to your computer and use it in GitHub Desktop.
Mix.install([
{:ortex, "~> 0.1.10"},
{:nx, "~> 0.10.0"}
])
directory_path = "/Users/evadne/Projects/magika/assets/models/standard_v3_3"
model_path = Path.join(directory_path, "model.onnx")
config_path = Path.join(directory_path, "config.min.json")
config = config_path |> File.read!() |> JSON.decode!()
min_size = config["min_file_size_for_dl"]
head_size = config["beg_size"]
tail_size = config["end_size"]
padding = config["padding_token"]
labels = config["target_labels_space"]
thresholds = config["thresholds"]
threshold = config["medium_confidence_threshold"]
# file_path = "/Users/evadne/Downloads/Padlet.pdf"
file_path = "/Users/evadne/Downloads/Zed-aarch64.dmg"
# file_path = "/Users/evadne/Projects/gen_magika/test.bin"
# file_path = "/Users/evadne/Downloads/0x0804.ini"
{:ok, file_stat} = File.stat(file_path)
if file_stat.size < min_size do
IO.puts "file is too small, we would have to guess type"
else
{:ok, file} = File.open(file_path, [:binary, :read, :raw])
file_head_size = min(file_stat.size, head_size)
file_head_offset = 0
file_tail_size = min(file_stat.size, tail_size)
file_tail_offset = max(0, file_stat.size - file_tail_size)
file_locnums = [{file_head_offset, file_head_size}, {file_tail_offset, file_tail_size}]
{:ok, [file_head, file_tail]} = :file.pread(file, file_locnums)
input_binary = file_head <> :binary.copy(<<padding>>, (head_size - file_head_size) + (tail_size - file_tail_size)) <> file_tail
input = Nx.from_binary(input_binary, :u8) |> Nx.reshape({1, 2048}) |> Nx.as_type(:s32)
model = Ortex.load(model_path)
{result} = Ortex.run(model, input)
labels_count = length(labels)
{1, ^labels_count} = Nx.shape(result)
result = result |> Nx.reshape({labels_count}) |> Nx.to_list() |> Enum.with_index() |> Enum.sort_by(& elem(&1, 0), :desc)
[first, second | _] = result
{score_first, index_first} = first
{score_second, _index_second} = second
score = score_first
confidence = score_first - score_second
label = Enum.at(labels, index_first)
cond do
score < threshold -> IO.puts "bad because score is below general threshold"
score < Map.get(thresholds, label, 0) -> IO.puts "bad because score is below file type specific threshold"
true -> IO.inspect [score, confidence, label]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment