Skip to content

Instantly share code, notes, and snippets.

@kxxt
Last active February 25, 2026 14:43
Show Gist options
  • Select an option

  • Save kxxt/fdf0a8cc9e899ac6e473e005f533db45 to your computer and use it in GitHub Desktop.

Select an option

Save kxxt/fdf0a8cc9e899ac6e473e005f533db45 to your computer and use it in GitHub Desktop.
(Nix) Protect your Forgejo instance against AI scrapers with iocaine

Protect your Forgejo instance against AI scrapers with iocaine

Recently AI scrapers started endlessly crawling my forgejo instance and causes a persistent CPU usage above 80%. So I decided to feed them with iocaine. The config is shared to benefit the community

In your flake.nix:

Add iocaine to inputs (The current version in nixpkgs is too old):

iocaine.url = "git+https://git.madhouse-project.org/iocaine/iocaine.git?ref=610b56f4b4";

And pass it via specialArgs so that we can use it in the nixos module:

nixpkgs.lib.nixosSystem {
  system = "x86_64-linux";
  modules = [
    ./machines/whatever/configuration.nix
    sops-nix.nixosModules.sops
  ];
  specialArgs = { inherit iocaine; };
};

Other files:

  • ai.robots.txt-robots.json: Download from https://github.com/ai-robots-txt/ai.robots.txt/raw/refs/heads/main/robots.json
  • wordlist.txt: Prepare your own.
  • training-text.txt: Prepare your own. Personally I chose the GPL license text.
# The configuration file of your forgejo nixos module:
{ lib, pkgs, config, iocaine, ... }:
let
cfg = config.services.forgejo;
srv = cfg.settings.server;
iocaine-port = 42069;
iocaine-state-dir = "/var/lib/iocaine";
in
{
# Iocaine AI Poison
systemd.services.iocaine = {
enable = true;
description = "iocaine, the deadliest poison known to AI";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "notify";
ExecStart = "${iocaine.outputs.packages.${pkgs.system}.iocaine}/bin/iocaine --config-path /etc/iocaine/config.kdl start";
Restart = "on-failure";
DynamicUser = true;
UMask = "0077";
LimitNOFILE = 524288;
StateDirectory = "iocaine";
WorkingDirectory = iocaine-state-dir;
RuntimeDirectory = "iocaine";
ProtectSystem = "strict";
ProtectClock = true;
ProtectHostname = true;
ProtectProc = "invisible";
ProtectControlGroups = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectKernelLogs = true;
ProtectHome = true;
PrivateTmp = true;
PrivateDevices = true;
PrivateUsers = true;
SystemCallArchitectures = "native";
DevicePolicy = "closed";
LockPersonality = true;
MemoryDenyWriteExecute = false;
NoNewPrivileges = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
"AF_UNIX"
];
RestrictNamespaces = true;
RestrictRealtime = true;
SystemCallFilter = [
"@system-service"
"~@privileged"
"~@resources"
];
CapabilityBoundingSet = "";
};
};
environment.etc."iocaine/config.kdl".text = ''
initial-seed ""
state-directory "${iocaine-state-dir}"
http-server default {
bind "127.0.0.1:${toString iocaine-port}"
use handler-from=default
}
declare-handler default language=roto
declare-handler default {
ai-robots-txt-path "/etc/iocaine/ai.robots.txt-robots.json"
sources {
training-corpus "/etc/iocaine/training-text.txt"
wordlists "/etc/iocaine/wordlist.txt"
}
}
'';
environment.etc."iocaine/wordlist.txt".source = ./iocaine/wordlist.txt;
environment.etc."iocaine/training-text.txt".source = ./iocaine/GPL-3.0.txt;
environment.etc."iocaine/ai.robots.txt-robots.json".source = ./iocaine/ai.robots.txt-robots.json;
services.nginx = {
virtualHosts.${cfg.settings.server.DOMAIN} = {
forceSSL = true;
enableACME = true;
extraConfig = ''
client_max_body_size 512M;
'';
locations."/".extraConfig = ''
proxy_cache off;
proxy_intercept_errors on;
proxy_set_header Host $host;
proxy_pass http://127.0.0.1:${toString iocaine-port};
error_page 421 = @fallback;
'';
locations."@fallback".proxyPass = "http://localhost:${toString srv.HTTP_PORT}";
};
};
services.openssh.settings.AcceptEnv = "GIT_PROTOCOL";
# nix options have support for dump that dumps the state to compressed tarball.
# However, such a format is not suitable for incremental backup.
# Let's use btrfs snapshots.
services.btrbk = {
instances."forgejo" = {
onCalendar = "Mon *-*-* 02:00:00 Asia/Hong_Kong";
settings = {
snapshot_preserve = "14d";
snapshot_preserve_min = "2d";
ssh_identity = "/var/lib/secrets/id_btrbk"; # NOTE: must be readable by user/group btrbk
ssh_user = "root"; # We have ssh_filter_btrbk.sh at receiving end
stream_compress = "zstd";
volume."/" = {
snapshot_dir = "btrbk_snapshots";
target."ssh://first-backup-server/backup/btrbk/forgejo" = {};
target."ssh://second-backup-server/backup/btrbk/forgejo" = {};
subvolume = {
"var/lib/forgejo" = {};
"var/lib/postgresql" = {};
};
};
};
};
};
# Stop forgejo so that when we backup two subvolumes(data and database),
# the state is consistent.
systemd.services."btrbk-forgejo".serviceConfig = let config = "/etc/btrbk/forgejo.conf"; in {
ReadOnlyPaths = ["/var/lib/secrets/id_btrbk"];
ExecStart = lib.mkForce [
"${pkgs.btrbk}/bin/btrbk -c ${config} resume"
];
ExecStartPre = lib.mkBefore [
# Stop services
"+${pkgs.systemd}/bin/systemctl stop forgejo"
"+${pkgs.systemd}/bin/systemctl stop postgresql"
# Perform snapshot step
"${pkgs.btrbk}/bin/btrbk -c ${config} snapshot -p"
# Restore services after snapshot is done
"+${pkgs.systemd}/bin/systemctl start postgresql"
"+${pkgs.systemd}/bin/systemctl start forgejo"
];
};
services.forgejo = {
enable = true;
database.type = "postgres";
# Enable support for Git Large File Storage
lfs.enable = true;
settings = {
server = {
DOMAIN = "your.git.instance.example.com";
# You need to specify this to remove the port from URLs in the web UI.
ROOT_URL = "https://${srv.DOMAIN}/";
HTTP_PORT = 3000;
SSH_PORT = lib.head config.services.openssh.ports;
};
# You can temporarily allow registration to create an admin user.
service.DISABLE_REGISTRATION = true;
# Add support for actions, based on act: https://github.com/nektos/act
actions = {
ENABLED = false;
DEFAULT_ACTIONS_URL = "github";
};
# Sending emails is completely optional
# You can send a test email from the web UI at:
# Profile Picture > Site Administration > Configuration > Mailer Configuration
mailer = {
ENABLED = true;
SMTP_ADDR = "mail.example.dev";
FROM = "git@example.dev";
USER = "git@example.dev";
SMTP_PORT = 465; # Secure SMTP
};
attachment = {
MAX_SIZE = 10; # Max attachment size is 10MB.
};
# /metrics for Prometheus
metrics = {
ENABLED = true;
};
# Minimize attack surface
api = {
ENABLE_SWAGGER = false;
};
oauth2 = {
ENABLED = false;
};
other = {
SHOW_FOOTER_VERSION = false;
};
};
secrets = {
mailer.PASSWD = "XXXXXXXXXXXXXXXXXXXXX";
metrics.TOKEN = "XXXXXXXXXXXXXXXXXXXXX";
};
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment