bouroo/60-sysctl.conf

## 60-sysctl.conf
################################################################################
# /etc/sysctl.d/60-sysctl.conf
# Performance-Optimized Kernel Tuning for Web + DB Servers
# Apply with: sysctl --system
# Impact: Focused on >5% performance gains for high-concurrency workloads
################################################################################

################################################################################
# MEMORY MANAGEMENT
################################################################################

# Swappiness: Controls kernel's tendency to swap (0-100).
# Impact: 10-30%+ performance gain for database workloads by preventing unnecessary swapping.
# Value 10 favors keeping pages in memory, critical for DB cache effectiveness.
vm.swappiness = 10

# Dirty page management - controls write behavior for I/O performance.
# Impact: 5-15% improvement in write-heavy workloads by reducing write burst size.
# These values ensure more frequent, smaller writes instead of large I/O spikes.

# Percentage of total memory where processes start synchronous writes (background threshold)
vm.dirty_background_ratio = 5

# Maximum percentage of memory holding dirty pages before forced synchronous writes
vm.dirty_ratio = 15

# Time in centiseconds dirty data can stay in memory before being written (15 seconds)
# Default is 30 seconds. Reducing this prevents large writeback spikes.
vm.dirty_expire_centisecs = 1500

# Time in centiseconds between background writeback cycles (2.5 seconds)
# More frequent cycles = smoother I/O, better for concurrent DB/web workloads
vm.dirty_writeback_centisecs = 250

# Memory overcommit behavior for database workloads.
# Impact: Enables databases (PostgreSQL, MySQL) to allocate large buffers efficiently.
# Value 1 allows overcommit, essential for databases that reserve memory upfront.
# Monitor OOM killer; adjust to 2 if memory is constrained.
vm.overcommit_memory = 1

# Percentage of RAM that can be overcommitted (200% = 2x physical RAM)
vm.overcommit_ratio = 200

# VFS cache pressure - controls reclaiming of directory/inode cache.
# Impact: 5-10% filesystem performance improvement for metadata-heavy workloads.
# Lower than default (100) preserves more dentry/inode cache, beneficial for web servers.
vm.vfs_cache_pressure = 75

# Minimum free RAM threshold (128MB) to prevent system freeze under memory pressure.
# Critical stability parameter. Increase to 262144 (256MB) for systems with 64GB+ RAM.
vm.min_free_kbytes = 131072

################################################################################
# NETWORK STACK
################################################################################

# TCP Congestion Control: BBR (Bottleneck Bandwidth and Round-trip propagation time)
# Impact: 5-40% throughput improvement and reduced latency for modern networks.
# Significantly outperforms Cubic/Reno on high-BDP and lossy networks.
net.ipv4.tcp_congestion_control = bbr

# Default queuing discipline: fq_codel (Fair Queuing Controlled Delay)
# Impact: 10-30% latency reduction by eliminating bufferbloat.
# Provides fair bandwidth distribution and low latency under load.
net.core.default_qdisc = fq_codel

# TCP buffer sizes (min, default, max in bytes).
# Impact: 15-50% throughput improvement on high-bandwidth, high-latency networks.
# Critical for maximizing throughput on 1Gbps+ connections and cross-region deployments.
net.ipv4.tcp_rmem = 4096 87380 33554432     # Max 32MB receive buffer
net.ipv4.tcp_wmem = 4096 65536 33554432     # Max 32MB send buffer

# Core socket buffer limits.
# Impact: Enables large TCP buffers above. Required for high-throughput connections.
net.core.rmem_max = 33554432      # 32MB max receive buffer
net.core.wmem_max = 33554432      # 32MB max send buffer

# Disable TCP slow start after idle periods.
# Impact: 10-20% performance improvement for spiky web traffic.
# Prevents throughput collapse when connections resume after idle periods.
net.ipv4.tcp_slow_start_after_idle = 0

# TCP Fast Open: Allow data in SYN packet (mode 3 = enabled for server and client).
# Impact: 10-30ms latency reduction for repeated connections (HTTP keepalive, DB pools).
# Particularly beneficial for API servers and high-frequency DB queries.
net.ipv4.tcp_fastopen = 3

# Reuse TIME-WAIT sockets for new connections.
# Impact: Reduces connection setup overhead for high-concurrency web servers.
# Critical for systems handling >10k concurrent connections.
net.ipv4.tcp_tw_reuse = 1

# FIN-WAIT-2 timeout in seconds.
# Impact: Faster resource cleanup, reduces memory pressure under load.
# Lower than default (60s) for quicker connection recycling.
net.ipv4.tcp_fin_timeout = 15

# Maximum connection backlog for listening sockets.
# Impact: Prevents connection drops during traffic spikes.
# Essential for web servers handling bursty traffic (e.g., 8192 vs default 128).
net.core.somaxconn = 8192

# SYN backlog for half-open connections.
# Impact: Prevents SYN flood impact on legitimate connections during high load.
# Paired with tcp_syncookies for DoS resilience.
net.ipv4.tcp_max_syn_backlog = 8192

# TCP window scaling for high-bandwidth, high-latency networks.
# Impact: Enables throughput > 65KB on modern networks (essential for 1Gbps+).
net.ipv4.tcp_window_scaling = 1

# Selective ACK - receiver can inform sender about missing segments.
# Impact: 5-15% throughput improvement on lossy networks (wireless, long-distance).
net.ipv4.tcp_sack = 1

# TCP timestamps (required for tcp_tw_reuse, RTT estimation).
# Impact: Enables accurate RTT calculation and PAWS (Protection Against Wrapped Sequences).
net.ipv4.tcp_timestamps = 1

# Network device backlog - packets queued when kernel can't process fast enough.
# Impact: Prevents packet loss on 10Gbps+ NICs under heavy load.
net.core.netdev_max_backlog = 10000

# NAPI poll budget - packets processed per interrupt.
# Impact: Improves throughput by reducing interrupt overhead.
# Value 600 balances throughput and latency for modern NICs.
net.core.netdev_budget = 600

# TCP retransmissions before giving up (default 15).
# Impact: Faster failure detection (240s vs 900s), better for connection pooling.
net.ipv4.tcp_retries2 = 8

# Maximum TIME-WAIT sockets allowed.
# Impact: Higher limit prevents connection failures under high churn.
net.ipv4.tcp_max_tw_buckets = 262144

# Maximum orphaned sockets (not attached to file descriptors).
# Impact: Prevents resource exhaustion during connection storms.
net.ipv4.tcp_max_orphans = 65536

# Do not save TCP metrics from closed connections.
# Impact: Avoids stale routing decisions on dynamic network environments.
net.ipv4.tcp_no_metrics_save = 1

# Path MTU discovery probing (enabled after ICMP black hole detection).
# Impact: Ensures optimal packet size, prevents fragmentation.
net.ipv4.tcp_mtu_probing = 1

# Connection tracking table size.
# Impact: Supports up to 1M concurrent connections (~300MB RAM).
# Critical for high-traffic web servers and load balancers.
net.netfilter.nf_conntrack_max = 1048576

# Established connection timeout (2 hours).
# Impact: Balance between memory usage and long-lived connection support.
net.netfilter.nf_conntrack_tcp_timeout_established = 7200

################################################################################
# FILE SYSTEM & I/O
################################################################################

# System-wide file descriptor limit.
# Impact: Critical for web servers (Nginx/Apache) and databases (many open files).
# Prevents "too many open files" errors under high concurrency.
fs.file-max = 4194304

# Shared memory segment size (16GB) for databases.
# Impact: Enables PostgreSQL/MySQL to use large shared buffers.
# Essential for OLTP workloads requiring substantial in-memory caching.
kernel.shmmax = 17179869184

# Total shared memory pages (4K pages * 4M = ~16GB).
kernel.shmall = 4194304

# Maximum shared memory segments.
kernel.shmmni = 4096

################################################################################
# PROCESS MANAGEMENT
################################################################################

# Maximum process ID (process/thread limit).
# Impact: Supports high-concurrency web servers (Nginx workers, thread pools).
# Prevents process creation failures under load.
kernel.pid_max = 131072

# Maximum threads system-wide.
# Impact: Supports multi-threaded applications (Java, Node.js, Python async).
kernel.threads-max = 524288

################################################################################
# SECURITY
################################################################################

# TCP SYN cookies - protect against SYN flood attacks.
net.ipv4.tcp_syncookies = 1

# Reverse path filtering - prevent IP spoofing.
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1

# Disable ICMP redirects - prevent MITM attacks.
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0

# Disable source routing - security risk mitigation.
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv6.conf.all.accept_source_route = 0
net.ipv6.conf.default.accept_source_route = 0

# Secure redirects - only accept from default route gateways.
net.ipv4.conf.all.secure_redirects = 1
net.ipv4.conf.default.secure_redirects = 1

# Log martian packets (impossible source addresses).
net.ipv4.conf.all.log_martians = 1
net.ipv4.conf.default.log_martians = 1

# Ignore ICMP broadcasts - prevent smurf attacks.
net.ipv4.icmp_echo_ignore_broadcasts = 1

# Ignore bogus ICMP error responses.
net.ipv4.icmp_ignore_bogus_error_responses = 1

# TCP TIME-WAIT assassination protection.
net.ipv4.tcp_rfc1337 = 1

# Protect against TIME-WAIT assassination.
net.ipv4.tcp_rfc1337 = 1

## 61-sysctl-old-os.conf
################################################################################
# /etc/sysctl.d/61-sysctl-old-os.conf
# Performance-Critical Kernel Optimizations for Web + DB Server
# Target: CentOS 6 / RHEL 6 (kernel 2.6.32)
# Workload: High-concurrency web services with database workloads
# Apply with: sysctl --system
################################################################################

########################
# MEMORY MANAGEMENT
########################
# Reduce swapping tendency - keeps hot data in RAM for database performance
# Impact: 10-30% for database workloads, prevents swap-induced latency spikes
vm.swappiness = 10

# Reduce pressure on VFS cache - keeps directory/inode entries in memory
# Impact: 5-15% for filesystem-heavy workloads, reduces metadata lookups
vm.vfs_cache_pressure = 50

# Dirty page ratio - start writeback at 15% of RAM
# Impact: Balances write coalescing vs responsiveness, critical for DB write performance
vm.dirty_ratio = 15

# Background writeback threshold - start at 5% of RAM
# Impact: Prevents write storms, smooths I/O patterns
vm.dirty_background_ratio = 5

# Minimum free memory reserve (64MB) - prevents low-memory deadlocks
# Impact: System stability under memory pressure
vm.min_free_kbytes = 65536

# Allow memory overcommit (essential for database fork operations)
# Impact: Enables PostgreSQL/MariaDB to allocate memory efficiently
vm.overcommit_memory = 1

# Overcommit ratio at 100% - allows allocation equal to physical RAM + swap
# Impact: Prevents allocation failures for database operations
vm.overcommit_ratio = 100

########################
# SHARED MEMORY (Database Performance)
########################
# Maximum shared memory segment: 16GB - critical for large database workloads
# Impact: Enables PostgreSQL shared_buffers, MySQL buffer_pool tuning
kernel.shmmax = 17179869184

# Total shared memory pages: 16GB / 4KB = 4,194,304 pages
# Impact: Controls aggregate shared memory allocation for multiple DB instances
kernel.shmall = 4194304

# Maximum shared memory segments: 4096
# Impact: Supports multiple database instances or applications
kernel.shmmni = 4096

########################
# NETWORK CORE (Connection Handling)
########################
# Maximum pending connections: 4096 - critical for high-traffic web servers
# Impact: Increases connection acceptance rate, reduces dropped connections
net.core.somaxconn = 4096

# Maximum packet backlog per interface: 5000
# Impact: Reduces packet drops under high network load (10-20% on 10Gbps+)
net.core.netdev_max_backlog = 5000

# Maximum socket receive buffer: 16MB
# Impact: Enables high-throughput transfers on high-latency networks
net.core.rmem_max = 16777216

# Maximum socket send buffer: 16MB
# Impact: Improves bulk data transmission performance
net.core.wmem_max = 16777216

########################
# TCP/IP STACK (Throughput & Concurrency)
########################
# SYN backlog queue: 8192 - defends against SYN floods + accepts bursts
# Impact: Improves connection handling during traffic spikes (10-25% for web)
net.ipv4.tcp_max_syn_backlog = 8192

# Reuse TIME_WAIT sockets for new connections
# Impact: Dramatically reduces port exhaustion under high connection churn (20-40%)
net.ipv4.tcp_tw_reuse = 1

# TIME_WAIT socket bucket limit: 262144
# Impact: Supports high connection turnover rates
net.ipv4.tcp_max_tw_buckets = 262144

# FIN timeout: 25 seconds (default 60) - faster connection cleanup
# Impact: Reduces TIME_WAIT accumulation, frees resources faster
net.ipv4.tcp_fin_timeout = 25

# TCP receive buffer: 4KB min, 85KB default, 16MB max
# Impact: Auto-tuning enables optimal bandwidth-delay product utilization
net.ipv4.tcp_rmem = 4096 87380 16777216

# TCP send buffer: 4KB min, 64KB default, 16MB max
# Impact: Auto-tuning optimizes throughput for varying network conditions
net.ipv4.tcp_wmem = 4096 65536 16777216

# Enable TCP window scaling (RFC 1323)
# Impact: Essential for high-speed networks (>100Mbps) with latency
net.ipv4.tcp_window_scaling = 1

# Enable Selective ACKs (SACK)
# Impact: 5-15% throughput improvement on lossy networks
net.ipv4.tcp_sack = 1

# Disable slow start after idle periods
# Impact: Maintains high throughput after connection pauses (10-30% for long-lived)
net.ipv4.tcp_slow_start_after_idle = 0

########################
# CONNECTION TRACKING (Firewall/NAT)
########################
# Maximum tracked connections: 262144
# Impact: Prevents conntrack table exhaustion under high concurrent connections
net.netfilter.nf_conntrack_max = 262144

# Established connection timeout: 2 hours
# Impact: Balances memory usage vs connection state preservation
net.netfilter.nf_conntrack_tcp_timeout_established = 7200

########################
# FILE SYSTEM (Concurrency Limits)
########################
# Maximum open file descriptors: 2,097,152
# Impact: Critical for high-concurrency web servers (Nginx, Apache, Tomcat)
fs.file-max = 2097152

# Maximum async I/O operations: 1,048,576
# Impact: Essential for database performance (PostgreSQL, MySQL async I/O)
fs.aio-max-nr = 1048576

########################
# SECURITY PARAMETERS (Preserved)
########################
# Address space layout randomization
kernel.randomize_va_space = 2

# Kernel pointer restrictions
kernel.dmesg_restrict = 1
kernel.kptr_restrict = 1

# SYN cookies protection (DDoS resistance)
net.ipv4.tcp_syncookies = 1

# SYN/SYNACK retry limits (prevents resource exhaustion)
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_synack_retries = 2

# TCP TIME-WAIT assassination protection
net.ipv4.tcp_rfc1337 = 1

# ARP cache limits (prevents neighbor table overflow)
net.ipv4.neigh.default.gc_thresh1 = 512
net.ipv4.neigh.default.gc_thresh2 = 1024
net.ipv4.neigh.default.gc_thresh3 = 2048

########################
# NETWORK SECURITY (IPv4)
########################
# Reverse path filtering (anti-spoofing)
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1

# Disable ICMP redirects
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0

# Disable source routing
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0

# Log martian packets (spoofing detection)
net.ipv4.conf.all.log_martians = 1
net.ipv4.conf.default.log_martians = 1

# Broadcast ping protection
net.ipv4.icmp_echo_ignore_broadcasts = 1

# Bogus ICMP error protection
net.ipv4.icmp_ignore_bogus_error_responses = 1

########################
# NETWORK SECURITY (IPv6)
########################
# Disable IPv6 redirects and source routing
net.ipv6.conf.all.accept_redirects = 0
net.ipv6.conf.default.accept_redirects = 0
net.ipv6.conf.all.accept_source_route = 0
net.ipv6.conf.default.accept_source_route = 0

## 80-k8s.conf
################################################################################
# /etc/sysctl.d/80-k8s.conf
# Performance-Critical Kernel Optimizations for Kubernetes Nodes
# Target: Production K8s clusters with kube-proxy (nftables mode)
# Workload: High-concurrency microservices, inter-pod networking, high connection churn
# Hardware: Enterprise-grade servers (16GB+ RAM, 10Gbps+ network recommended)
# Apply with: sysctl --system
################################################################################

########################
# MEMORY MANAGEMENT
########################
# Reduced swapping - keeps containers in RAM, critical for pod performance
# Impact: 15-30% for containerized workloads, prevents swap death during OOM pressure
vm.swappiness = 10

# Enable memory overcommit - essential for container memory allocation
# Impact: Prevents container startup failures, enables efficient memory utilization
vm.overcommit_memory = 1

# Overcommit ratio at 100% - allows allocation equal to physical RAM + swap
# Impact: Balances memory utilization with stability for container workloads
vm.overcommit_ratio = 100

# Background writeback at 5% of RAM - smooths I/O under heavy container loads
# Impact: 10-20% I/O performance improvement, prevents write storms
vm.dirty_background_ratio = 5

# Dirty ratio at 10% - aggressive writeback for responsiveness
# Impact: 15-25% I/O performance under database-heavy container workloads
vm.dirty_ratio = 10

# Writeback every 5 seconds - more frequent writes reduce burst I/O
# Impact: 5-10% smoother I/O patterns, better latency predictability
vm.dirty_writeback_centisecs = 500

# Reduce VFS cache pressure - keeps container metadata in memory
# Impact: 5-15% performance for filesystem-heavy workloads (container image layers)
vm.vfs_cache_pressure = 50

# Minimum free memory reserve (128MB) - prevents OOM deadlocks
# Impact: System stability under memory pressure, critical for kubelet stability
vm.min_free_kbytes = 131072

########################
# TCP CONNECTION HANDLING
########################
# SYN backlog queue: 8192 - handles connection bursts during service scaling
# Impact: 10-25% connection acceptance rate during pod autoscaling events
net.ipv4.tcp_max_syn_backlog = 8192

# Reuse TIME_WAIT sockets - accelerates socket recycling for microservices
# Impact: 20-40% for connection-heavy workloads, reduces port exhaustion
net.ipv4.tcp_tw_reuse = 1

# Maximum TIME_WAIT buckets: 1.44M - accommodates high connection churn
# Impact: Prevents kernel panic under extreme connection turnover rates
net.ipv4.tcp_max_tw_buckets = 1440000

# FIN timeout: 30s - faster connection cleanup than default (60s)
# Impact: 10-20% faster resource recovery for short-lived connections
net.ipv4.tcp_fin_timeout = 30

# Connection queue limit: 65535 - accommodates bursty traffic patterns
# Impact: 15-30% reduction in connection drops for service-to-service traffic
net.core.somaxconn = 65535

# Maximum orphaned sockets: 262144 - handles socket leaks gracefully
# Impact: System stability under connection stress
net.ipv4.tcp_max_orphans = 262144

########################
# TCP CONGESTION & THROUGHPUT
########################
# BBR congestion control - superior for cloud/lossy networks
# Impact: 10-40% throughput improvement vs Cubic for inter-datacenter traffic
net.ipv4.tcp_congestion_control = bbr

# TCP window scaling (RFC 1323) - enables >64KB windows
# Impact: 5-15% throughput on high-latency networks (>100ms)
net.ipv4.tcp_window_scaling = 1

# Selective ACKs (SACK) - recovers faster from packet loss
# Impact: 5-20% throughput improvement on lossy networks
net.ipv4.tcp_sack = 1

# Disable slow start after idle - maintains high throughput
# Impact: 10-30% for long-lived connections with idle periods (gRPC, WebSocket)
net.ipv4.tcp_slow_start_after_idle = 0

# TCP Fast Open (Cookie Mode) - reduces connection establishment latency
# Impact: 5-15% latency reduction for microservice communication
net.ipv4.tcp_fastopen = 3

########################
# NETWORK BUFFERS & QDISC
########################
# Interface packet backlog: 30,000 - handles bursty traffic on 10Gbps+
# Impact: 15-30% reduction in packet drops under high network load
net.core.netdev_max_backlog = 30000

# Maximum socket receive buffer: 16MB
# Impact: Enables high-throughput transfers on high-latency networks
net.core.rmem_max = 16777216

# Maximum socket send buffer: 16MB
# Impact: Improves bulk data transmission performance
net.core.wmem_max = 16777216

# TCP receive buffer: 4KB min, 85KB default, 16MB max
# Impact: Auto-tuning optimizes throughput for varying network conditions
net.ipv4.tcp_rmem = 4096 87380 16777216

# TCP send buffer: 4KB min, 64KB default, 16MB max
# Impact: Auto-tuning enables optimal bandwidth-delay product utilization
net.ipv4.tcp_wmem = 4096 65536 16777216

# Fair Queueing (fq) scheduler - required for BBR
# Impact: Essential for BBR congestion control effectiveness
net.core.default_qdisc = fq

########################
# CONNECTION TRACKING
########################
# Maximum tracked connections: 2,097,152
# Impact: Prevents conntrack exhaustion in large clusters (50+ nodes, 1000+ pods)
net.netfilter.nf_conntrack_max = 2097152

# Established connection timeout: 24 hours
# Impact: Balances memory usage with state preservation for long-lived connections
net.netfilter.nf_conntrack_tcp_timeout_established = 86400

########################
# ROUTING & BRIDGE (K8s Required)
########################
# Enable IPv4 forwarding - required for pod-to-pod communication
# Impact: Essential functionality for Kubernetes networking
net.ipv4.ip_forward = 1

# Bridge firewall integration - required for kube-proxy network policies
# Impact: Essential functionality for K8s network policies
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-arptables = 1

# IPv6 forwarding - required for dual-stack K8s clusters
# Impact: Essential functionality for IPv6 support
net.ipv6.conf.all.forwarding = 1
net.ipv6.conf.default.forwarding = 1

########################
# ARP CACHE
########################
# ARP cache thresholds - prevent neighbor table overflow in large clusters
# Impact: Critical stability parameter for clusters with 100+ nodes
net.ipv4.neigh.default.gc_thresh1 = 2048
net.ipv4.neigh.default.gc_thresh2 = 4096
net.ipv4.neigh.default.gc_thresh3 = 8192

########################
# FILE SYSTEM & LIMITS
########################
# Maximum open file descriptors: 2,097,152
# Impact: Prevents resource exhaustion for container runtime and high-concurrency services
fs.file-max = 2097152

# Inotify watches: 524,288 - monitors container filesystem changes
# Impact: Critical for Kubernetes components (kubelet, containerd) and sidecars
fs.inotify.max_user_watches = 524288

# Maximum async I/O operations: 1,048,576
# Impact: Enables high-performance storage for container volumes
fs.aio-max-nr = 1048576

########################
# NETWORK PERFORMANCE
########################
# Packet processing budget: 600 packets per NAPI cycle
# Impact: 10-20% throughput improvement on multi-core servers
net.core.netdev_budget = 600

# Budget time: 5ms per NAPI cycle
# Impact: Balances throughput with latency for interactive workloads
net.core.netdev_budget_usecs = 5000

########################
# SECURITY
########################
# Address space layout randomization
kernel.randomize_va_space = 2

# Kernel pointer restrictions
kernel.dmesg_restrict = 1
kernel.kptr_restrict = 1

# ptrace scope - prevents attaching to non-child processes
kernel.yama.ptrace_scope = 1

# Disable core dumps (production security)
kernel.core_pattern = |/bin/false

# SYN cookies protection (DDoS resistance)
net.ipv4.tcp_syncookies = 1

# SYN/SYNACK retry limits (prevents resource exhaustion)
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_synack_retries = 2

# TCP TIME-WAIT assassination protection
net.ipv4.tcp_rfc1337 = 1

# Reverse path filtering (anti-spoofing)
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1

# Disable ICMP redirects (IPv4 & IPv6)
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv6.conf.all.accept_redirects = 0
net.ipv6.conf.default.accept_redirects = 0

# Disable source routing
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv6.conf.all.accept_source_route = 0
net.ipv6.conf.default.accept_source_route = 0

# Log martian packets (spoofing detection)
net.ipv4.conf.all.log_martians = 1
net.ipv4.conf.default.log_martians = 1

# Broadcast ping protection
net.ipv4.icmp_echo_ignore_broadcasts = 1

# Bogus ICMP error protection
net.ipv4.icmp_ignore_bogus_error_responses = 1

########################
# KUBERNETES SPECIFIC TUNING
########################
# MTU probing enables Path MTU Discovery for container networks
# Impact: Prevents fragmentation issues in overlay networks (Calico, Cilium, Flannel)
net.ipv4.tcp_mtu_probing = 1

# Don't save TCP metrics - reduces kernel overhead for short-lived connections
# Impact: 5-10% performance for microservice communication patterns
net.ipv4.tcp_no_metrics_save = 1

# TCP autocorking - reduces small packet overhead
# Impact: 5-15% throughput improvement for RPC workloads
net.ipv4.tcp_autocorking = 1

# Port range expansion - accommodates high connection counts
# Impact: Prevents ephemeral port exhaustion at scale
net.ipv4.ip_local_port_range = 1024 65535

## 80-pve.conf
################################################################################
# /etc/sysctl.d/80-pve.conf
# Performance-Critical Proxmox VE Host Tuning
# Optimized for: Virtualization host with high-concurrency VMs/containers
# Workload: Database, web services, network-intensive applications
# Hardware: 64GB+ RAM, multi-core CPU, high-speed storage
# Apply with: sysctl --system
################################################################################

########################
# MEMORY MANAGEMENT
########################
# CRITICAL: Strongly prefer dropping caches over swapping VM memory
# Impact: >10% for VM workloads by preventing swap thrashing and maintaining performance
vm.swappiness = 10

# CRITICAL: Tend to keep dentry/inode caches longer for VM disk access patterns
# Impact: 5-10% reduction in storage I/O for frequently-accessed VM files
vm.vfs_cache_pressure = 50

# CRITICAL: Limit dirty pages to prevent I/O spikes that stall VMs
# Impact: >10% by avoiding long synchronous write pauses that freeze VMs
vm.dirty_ratio = 10
vm.dirty_background_ratio = 5

# Maintain minimum free memory to prevent OOM under burst loads
# Adjust: 128MB for 16GB, 256MB for 32GB, 512MB for 64GB+ systems
# Impact: System stability - prevents catastrophic OOM situations
vm.min_free_kbytes = 524288

# Required for containerized workloads (Elasticsearch, databases)
# Impact: Enables specific applications to function without errors
vm.max_map_count = 262144

########################
# TCP & NETWORK STACK
########################
# BBR congestion control - better throughput for high-BDP networks
# Impact: 10-40% improvement in network throughput, especially over WAN/high-latency links
net.ipv4.tcp_congestion_control = bbr
net.core.default_qdisc = fq

# TCP Fast Open - reduces latency for repeated connections
# Impact: 5-15% latency reduction for web/database clients with persistent connections
net.ipv4.tcp_fastopen = 3
net.ipv4.tcp_fastopen_blackhole_timeout_sec = 0

# Disable TCP slow start after idle (critical for long-lived DB connections)
# Impact: >5% throughput for databases, Redis, and other persistent connections
net.ipv4.tcp_slow_start_after_idle = 0

# Optimize connection queues for high-concurrency workloads
# Impact: Enables handling of sudden connection spikes without drops
net.core.somaxconn = 8192
net.ipv4.tcp_max_syn_backlog = 8192
net.core.netdev_max_backlog = 16384

# Large TCP buffers for high-throughput transfers
# Impact: 10-30% improvement for large file transfers, backups, VM migrations
net.core.rmem_max = 33554432
net.core.wmem_max = 33554432
net.ipv4.tcp_mem = 65536 131072 262144

# Reduce TIME_WAIT state duration to free resources faster
# Impact: Higher connection turnover capacity for web/proxy servers
net.ipv4.tcp_fin_timeout = 15
net.ipv4.tcp_tw_reuse = 1

# Maximum ephemeral ports for high-concurrency outbound connections
# Impact: Prevents port exhaustion under extreme load
net.ipv4.ip_local_port_range = 1024 65535

# Reduce latency on write operations by sending smaller buffers more frequently
# Impact: 5-10% latency reduction for interactive protocols
net.ipv4.tcp_notsent_lowat = 16384

########################
# CONNECTION TRACKING
########################
# Maximum connection tracking entries for high-concurrency environments
# Impact: Prevents connection table exhaustion with many containers/VMs
net.netfilter.nf_conntrack_max = 1048576

# Optimized timeouts to free resources faster while maintaining stability
# Impact: Better memory utilization and higher connection throughput
net.netfilter.nf_conntrack_tcp_timeout_established = 43200
net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
net.netfilter.nf_conntrack_udp_timeout = 30
net.netfilter.nf_conntrack_udp_timeout_stream = 120
net.netfilter.nf_conntrack_generic_timeout = 120
net.netfilter.nf_conntrack_icmp_timeout = 30

########################
# SYSTEM RESOURCE LIMITS
########################
# Maximum open file handles - critical for databases and high-concurrency web servers
# Impact: Prevents "too many open files" errors under heavy load
fs.file-max = 2097152

# Maximum processes and threads - essential for container orchestration
# Impact: Enables running thousands of containers/processes without hitting limits
kernel.pid_max = 4194304
kernel.threads-max = 524288

# Shared memory limits - required for databases (PostgreSQL, Oracle) and large VMs
# Impact: Enables databases to allocate necessary shared memory segments
kernel.shmmax = 68719476736      # 64GB
kernel.shmall = 4294967296      # Page count for 64GB

# Semaphore limits - adjusts IPC capacity for concurrent processes
# Impact: Improves database and application IPC throughput
kernel.sem = 250 32000 100 128

########################
# INOTIFY
########################
# Inotify limits for container orchestration and monitoring tools
# Impact: Prevents monitoring failures in containerized environments
fs.inotify.max_user_instances = 512
fs.inotify.max_user_watches = 524288

########################
# BPF JIT OPTIMIZATION
########################
# Enable BPF JIT compiler for eBPF-based monitoring, networking (Cilium), and security tools
# Impact: 20-50% performance improvement for eBPF programs (monitoring, observability, service mesh)
net.core.bpf_jit_enable = 1
net.core.bpf_jit_harden = 2     # Maintains security while enabling performance

########################
# REQUIRED PROXMOX VE FUNCTIONALITY
########################
# IP forwarding for VM/container routing and NAT
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1

# Bridge netfilter - REQUIRED for Proxmox VE firewall on VM bridges
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-arptables = 1

########################
# SECURITY
########################
# SYN cookies - protect against SYN flood attacks
net.ipv4.tcp_syncookies = 1

# Reverse path filtering - prevent IP spoofing
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1

# Disable ICMP redirects - prevent MITM attacks
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv6.conf.all.accept_redirects = 0
net.ipv6.conf.default.accept_redirects = 0

# Disable source routing
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv6.conf.all.accept_source_route = 0
net.ipv6.conf.default.accept_source_route = 0

# Ignore ICMP echo broadcasts
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.icmp_ignore_bogus_error_responses = 1

# TCP RFC 1337 protection
net.ipv4.tcp_rfc1337 = 1

# Kernel hardening
kernel.kptr_restrict = 1
kernel.dmesg_restrict = 1
kernel.perf_event_paranoid = 2
kernel.randomize_va_space = 2
kernel.core_pattern = /dev/null
kernel.yama.ptrace_scope = 1

########################
# MINOR PERFORMANCE TUNING
########################
# Disable NMI watchdog for slight CPU overhead reduction
# Impact: <2% CPU savings on heavily loaded systems
kernel.nmi_watchdog = 0

# Huge pages - set to non-zero if using VMs with static hugepage backing
# Impact: 5-10% for memory-intensive VMs when properly configured
vm.nr_hugepages = 0

# ZFS tuning - uncomment and adjust if using ZFS storage backend
# Impact: Can improve ZFS performance by limiting ARC cache
# vfs.zfs.arc_max = 10737418240
# vfs.zfs.arc_min = 1073741824
# vfs.zfs.zio.use_uma = 1
	################################################################################
	# /etc/sysctl.d/60-sysctl.conf
	# Performance-Optimized Kernel Tuning for Web + DB Servers
	# Apply with: sysctl --system
	# Impact: Focused on >5% performance gains for high-concurrency workloads
	################################################################################

	################################################################################
	# MEMORY MANAGEMENT
	################################################################################

	# Swappiness: Controls kernel's tendency to swap (0-100).
	# Impact: 10-30%+ performance gain for database workloads by preventing unnecessary swapping.
	# Value 10 favors keeping pages in memory, critical for DB cache effectiveness.
	vm.swappiness = 10

	# Dirty page management - controls write behavior for I/O performance.
	# Impact: 5-15% improvement in write-heavy workloads by reducing write burst size.
	# These values ensure more frequent, smaller writes instead of large I/O spikes.

	# Percentage of total memory where processes start synchronous writes (background threshold)
	vm.dirty_background_ratio = 5

	# Maximum percentage of memory holding dirty pages before forced synchronous writes
	vm.dirty_ratio = 15

	# Time in centiseconds dirty data can stay in memory before being written (15 seconds)
	# Default is 30 seconds. Reducing this prevents large writeback spikes.
	vm.dirty_expire_centisecs = 1500

	# Time in centiseconds between background writeback cycles (2.5 seconds)
	# More frequent cycles = smoother I/O, better for concurrent DB/web workloads
	vm.dirty_writeback_centisecs = 250

	# Memory overcommit behavior for database workloads.
	# Impact: Enables databases (PostgreSQL, MySQL) to allocate large buffers efficiently.
	# Value 1 allows overcommit, essential for databases that reserve memory upfront.
	# Monitor OOM killer; adjust to 2 if memory is constrained.
	vm.overcommit_memory = 1

	# Percentage of RAM that can be overcommitted (200% = 2x physical RAM)
	vm.overcommit_ratio = 200

	# VFS cache pressure - controls reclaiming of directory/inode cache.
	# Impact: 5-10% filesystem performance improvement for metadata-heavy workloads.
	# Lower than default (100) preserves more dentry/inode cache, beneficial for web servers.
	vm.vfs_cache_pressure = 75

	# Minimum free RAM threshold (128MB) to prevent system freeze under memory pressure.
	# Critical stability parameter. Increase to 262144 (256MB) for systems with 64GB+ RAM.
	vm.min_free_kbytes = 131072

	################################################################################
	# NETWORK STACK
	################################################################################

	# TCP Congestion Control: BBR (Bottleneck Bandwidth and Round-trip propagation time)
	# Impact: 5-40% throughput improvement and reduced latency for modern networks.
	# Significantly outperforms Cubic/Reno on high-BDP and lossy networks.
	net.ipv4.tcp_congestion_control = bbr

	# Default queuing discipline: fq_codel (Fair Queuing Controlled Delay)
	# Impact: 10-30% latency reduction by eliminating bufferbloat.
	# Provides fair bandwidth distribution and low latency under load.
	net.core.default_qdisc = fq_codel

	# TCP buffer sizes (min, default, max in bytes).
	# Impact: 15-50% throughput improvement on high-bandwidth, high-latency networks.
	# Critical for maximizing throughput on 1Gbps+ connections and cross-region deployments.
	net.ipv4.tcp_rmem = 4096 87380 33554432 # Max 32MB receive buffer
	net.ipv4.tcp_wmem = 4096 65536 33554432 # Max 32MB send buffer

	# Core socket buffer limits.
	# Impact: Enables large TCP buffers above. Required for high-throughput connections.
	net.core.rmem_max = 33554432 # 32MB max receive buffer
	net.core.wmem_max = 33554432 # 32MB max send buffer

	# Disable TCP slow start after idle periods.
	# Impact: 10-20% performance improvement for spiky web traffic.
	# Prevents throughput collapse when connections resume after idle periods.
	net.ipv4.tcp_slow_start_after_idle = 0

	# TCP Fast Open: Allow data in SYN packet (mode 3 = enabled for server and client).
	# Impact: 10-30ms latency reduction for repeated connections (HTTP keepalive, DB pools).
	# Particularly beneficial for API servers and high-frequency DB queries.
	net.ipv4.tcp_fastopen = 3

	# Reuse TIME-WAIT sockets for new connections.
	# Impact: Reduces connection setup overhead for high-concurrency web servers.
	# Critical for systems handling >10k concurrent connections.
	net.ipv4.tcp_tw_reuse = 1

	# FIN-WAIT-2 timeout in seconds.
	# Impact: Faster resource cleanup, reduces memory pressure under load.
	# Lower than default (60s) for quicker connection recycling.
	net.ipv4.tcp_fin_timeout = 15

	# Maximum connection backlog for listening sockets.
	# Impact: Prevents connection drops during traffic spikes.
	# Essential for web servers handling bursty traffic (e.g., 8192 vs default 128).
	net.core.somaxconn = 8192

	# SYN backlog for half-open connections.
	# Impact: Prevents SYN flood impact on legitimate connections during high load.
	# Paired with tcp_syncookies for DoS resilience.
	net.ipv4.tcp_max_syn_backlog = 8192

	# TCP window scaling for high-bandwidth, high-latency networks.
	# Impact: Enables throughput > 65KB on modern networks (essential for 1Gbps+).
	net.ipv4.tcp_window_scaling = 1

	# Selective ACK - receiver can inform sender about missing segments.
	# Impact: 5-15% throughput improvement on lossy networks (wireless, long-distance).
	net.ipv4.tcp_sack = 1

	# TCP timestamps (required for tcp_tw_reuse, RTT estimation).
	# Impact: Enables accurate RTT calculation and PAWS (Protection Against Wrapped Sequences).
	net.ipv4.tcp_timestamps = 1

	# Network device backlog - packets queued when kernel can't process fast enough.
	# Impact: Prevents packet loss on 10Gbps+ NICs under heavy load.
	net.core.netdev_max_backlog = 10000

	# NAPI poll budget - packets processed per interrupt.
	# Impact: Improves throughput by reducing interrupt overhead.
	# Value 600 balances throughput and latency for modern NICs.
	net.core.netdev_budget = 600

	# TCP retransmissions before giving up (default 15).
	# Impact: Faster failure detection (240s vs 900s), better for connection pooling.
	net.ipv4.tcp_retries2 = 8

	# Maximum TIME-WAIT sockets allowed.
	# Impact: Higher limit prevents connection failures under high churn.
	net.ipv4.tcp_max_tw_buckets = 262144

	# Maximum orphaned sockets (not attached to file descriptors).
	# Impact: Prevents resource exhaustion during connection storms.
	net.ipv4.tcp_max_orphans = 65536

	# Do not save TCP metrics from closed connections.
	# Impact: Avoids stale routing decisions on dynamic network environments.
	net.ipv4.tcp_no_metrics_save = 1

	# Path MTU discovery probing (enabled after ICMP black hole detection).
	# Impact: Ensures optimal packet size, prevents fragmentation.
	net.ipv4.tcp_mtu_probing = 1

	# Connection tracking table size.
	# Impact: Supports up to 1M concurrent connections (~300MB RAM).
	# Critical for high-traffic web servers and load balancers.
	net.netfilter.nf_conntrack_max = 1048576

	# Established connection timeout (2 hours).
	# Impact: Balance between memory usage and long-lived connection support.
	net.netfilter.nf_conntrack_tcp_timeout_established = 7200

	################################################################################
	# FILE SYSTEM & I/O
	################################################################################

	# System-wide file descriptor limit.
	# Impact: Critical for web servers (Nginx/Apache) and databases (many open files).
	# Prevents "too many open files" errors under high concurrency.
	fs.file-max = 4194304

	# Shared memory segment size (16GB) for databases.
	# Impact: Enables PostgreSQL/MySQL to use large shared buffers.
	# Essential for OLTP workloads requiring substantial in-memory caching.
	kernel.shmmax = 17179869184

	# Total shared memory pages (4K pages * 4M = ~16GB).
	kernel.shmall = 4194304

	# Maximum shared memory segments.
	kernel.shmmni = 4096

	################################################################################
	# PROCESS MANAGEMENT
	################################################################################

	# Maximum process ID (process/thread limit).
	# Impact: Supports high-concurrency web servers (Nginx workers, thread pools).
	# Prevents process creation failures under load.
	kernel.pid_max = 131072

	# Maximum threads system-wide.
	# Impact: Supports multi-threaded applications (Java, Node.js, Python async).
	kernel.threads-max = 524288

	################################################################################
	# SECURITY
	################################################################################

	# TCP SYN cookies - protect against SYN flood attacks.
	net.ipv4.tcp_syncookies = 1

	# Reverse path filtering - prevent IP spoofing.
	net.ipv4.conf.all.rp_filter = 1
	net.ipv4.conf.default.rp_filter = 1

	# Disable ICMP redirects - prevent MITM attacks.
	net.ipv4.conf.all.accept_redirects = 0
	net.ipv4.conf.default.accept_redirects = 0
	net.ipv4.conf.all.send_redirects = 0
	net.ipv4.conf.default.send_redirects = 0

	# Disable source routing - security risk mitigation.
	net.ipv4.conf.all.accept_source_route = 0
	net.ipv4.conf.default.accept_source_route = 0
	net.ipv6.conf.all.accept_source_route = 0
	net.ipv6.conf.default.accept_source_route = 0

	# Secure redirects - only accept from default route gateways.
	net.ipv4.conf.all.secure_redirects = 1
	net.ipv4.conf.default.secure_redirects = 1

	# Log martian packets (impossible source addresses).
	net.ipv4.conf.all.log_martians = 1
	net.ipv4.conf.default.log_martians = 1

	# Ignore ICMP broadcasts - prevent smurf attacks.
	net.ipv4.icmp_echo_ignore_broadcasts = 1

	# Ignore bogus ICMP error responses.
	net.ipv4.icmp_ignore_bogus_error_responses = 1

	# TCP TIME-WAIT assassination protection.
	net.ipv4.tcp_rfc1337 = 1

	# Protect against TIME-WAIT assassination.
	net.ipv4.tcp_rfc1337 = 1
	################################################################################
	# /etc/sysctl.d/61-sysctl-old-os.conf
	# Performance-Critical Kernel Optimizations for Web + DB Server
	# Target: CentOS 6 / RHEL 6 (kernel 2.6.32)
	# Workload: High-concurrency web services with database workloads
	# Apply with: sysctl --system
	################################################################################

	########################
	# MEMORY MANAGEMENT
	########################
	# Reduce swapping tendency - keeps hot data in RAM for database performance
	# Impact: 10-30% for database workloads, prevents swap-induced latency spikes
	vm.swappiness = 10

	# Reduce pressure on VFS cache - keeps directory/inode entries in memory
	# Impact: 5-15% for filesystem-heavy workloads, reduces metadata lookups
	vm.vfs_cache_pressure = 50

	# Dirty page ratio - start writeback at 15% of RAM
	# Impact: Balances write coalescing vs responsiveness, critical for DB write performance
	vm.dirty_ratio = 15

	# Background writeback threshold - start at 5% of RAM
	# Impact: Prevents write storms, smooths I/O patterns
	vm.dirty_background_ratio = 5

	# Minimum free memory reserve (64MB) - prevents low-memory deadlocks
	# Impact: System stability under memory pressure
	vm.min_free_kbytes = 65536

	# Allow memory overcommit (essential for database fork operations)
	# Impact: Enables PostgreSQL/MariaDB to allocate memory efficiently
	vm.overcommit_memory = 1

	# Overcommit ratio at 100% - allows allocation equal to physical RAM + swap
	# Impact: Prevents allocation failures for database operations
	vm.overcommit_ratio = 100

	########################
	# SHARED MEMORY (Database Performance)
	########################
	# Maximum shared memory segment: 16GB - critical for large database workloads
	# Impact: Enables PostgreSQL shared_buffers, MySQL buffer_pool tuning
	kernel.shmmax = 17179869184

	# Total shared memory pages: 16GB / 4KB = 4,194,304 pages
	# Impact: Controls aggregate shared memory allocation for multiple DB instances
	kernel.shmall = 4194304

	# Maximum shared memory segments: 4096
	# Impact: Supports multiple database instances or applications
	kernel.shmmni = 4096

	########################
	# NETWORK CORE (Connection Handling)
	########################
	# Maximum pending connections: 4096 - critical for high-traffic web servers
	# Impact: Increases connection acceptance rate, reduces dropped connections
	net.core.somaxconn = 4096

	# Maximum packet backlog per interface: 5000
	# Impact: Reduces packet drops under high network load (10-20% on 10Gbps+)
	net.core.netdev_max_backlog = 5000

	# Maximum socket receive buffer: 16MB
	# Impact: Enables high-throughput transfers on high-latency networks
	net.core.rmem_max = 16777216

	# Maximum socket send buffer: 16MB
	# Impact: Improves bulk data transmission performance
	net.core.wmem_max = 16777216

	########################
	# TCP/IP STACK (Throughput & Concurrency)
	########################
	# SYN backlog queue: 8192 - defends against SYN floods + accepts bursts
	# Impact: Improves connection handling during traffic spikes (10-25% for web)
	net.ipv4.tcp_max_syn_backlog = 8192

	# Reuse TIME_WAIT sockets for new connections
	# Impact: Dramatically reduces port exhaustion under high connection churn (20-40%)
	net.ipv4.tcp_tw_reuse = 1

	# TIME_WAIT socket bucket limit: 262144
	# Impact: Supports high connection turnover rates
	net.ipv4.tcp_max_tw_buckets = 262144

	# FIN timeout: 25 seconds (default 60) - faster connection cleanup
	# Impact: Reduces TIME_WAIT accumulation, frees resources faster
	net.ipv4.tcp_fin_timeout = 25

	# TCP receive buffer: 4KB min, 85KB default, 16MB max
	# Impact: Auto-tuning enables optimal bandwidth-delay product utilization
	net.ipv4.tcp_rmem = 4096 87380 16777216

	# TCP send buffer: 4KB min, 64KB default, 16MB max
	# Impact: Auto-tuning optimizes throughput for varying network conditions
	net.ipv4.tcp_wmem = 4096 65536 16777216

	# Enable TCP window scaling (RFC 1323)
	# Impact: Essential for high-speed networks (>100Mbps) with latency
	net.ipv4.tcp_window_scaling = 1

	# Enable Selective ACKs (SACK)
	# Impact: 5-15% throughput improvement on lossy networks
	net.ipv4.tcp_sack = 1

	# Disable slow start after idle periods
	# Impact: Maintains high throughput after connection pauses (10-30% for long-lived)
	net.ipv4.tcp_slow_start_after_idle = 0

	########################
	# CONNECTION TRACKING (Firewall/NAT)
	########################
	# Maximum tracked connections: 262144
	# Impact: Prevents conntrack table exhaustion under high concurrent connections
	net.netfilter.nf_conntrack_max = 262144

	# Established connection timeout: 2 hours
	# Impact: Balances memory usage vs connection state preservation
	net.netfilter.nf_conntrack_tcp_timeout_established = 7200

	########################
	# FILE SYSTEM (Concurrency Limits)
	########################
	# Maximum open file descriptors: 2,097,152
	# Impact: Critical for high-concurrency web servers (Nginx, Apache, Tomcat)
	fs.file-max = 2097152

	# Maximum async I/O operations: 1,048,576
	# Impact: Essential for database performance (PostgreSQL, MySQL async I/O)
	fs.aio-max-nr = 1048576

	########################
	# SECURITY PARAMETERS (Preserved)
	########################
	# Address space layout randomization
	kernel.randomize_va_space = 2

	# Kernel pointer restrictions
	kernel.dmesg_restrict = 1
	kernel.kptr_restrict = 1

	# SYN cookies protection (DDoS resistance)
	net.ipv4.tcp_syncookies = 1

	# SYN/SYNACK retry limits (prevents resource exhaustion)
	net.ipv4.tcp_syn_retries = 2
	net.ipv4.tcp_synack_retries = 2

	# TCP TIME-WAIT assassination protection
	net.ipv4.tcp_rfc1337 = 1

	# ARP cache limits (prevents neighbor table overflow)
	net.ipv4.neigh.default.gc_thresh1 = 512
	net.ipv4.neigh.default.gc_thresh2 = 1024
	net.ipv4.neigh.default.gc_thresh3 = 2048

	########################
	# NETWORK SECURITY (IPv4)
	########################
	# Reverse path filtering (anti-spoofing)
	net.ipv4.conf.all.rp_filter = 1
	net.ipv4.conf.default.rp_filter = 1

	# Disable ICMP redirects
	net.ipv4.conf.all.accept_redirects = 0
	net.ipv4.conf.default.accept_redirects = 0
	net.ipv4.conf.all.send_redirects = 0
	net.ipv4.conf.default.send_redirects = 0

	# Disable source routing
	net.ipv4.conf.all.accept_source_route = 0
	net.ipv4.conf.default.accept_source_route = 0

	# Log martian packets (spoofing detection)
	net.ipv4.conf.all.log_martians = 1
	net.ipv4.conf.default.log_martians = 1

	# Broadcast ping protection
	net.ipv4.icmp_echo_ignore_broadcasts = 1

	# Bogus ICMP error protection
	net.ipv4.icmp_ignore_bogus_error_responses = 1

	########################
	# NETWORK SECURITY (IPv6)
	########################
	# Disable IPv6 redirects and source routing
	net.ipv6.conf.all.accept_redirects = 0
	net.ipv6.conf.default.accept_redirects = 0
	net.ipv6.conf.all.accept_source_route = 0
	net.ipv6.conf.default.accept_source_route = 0
	################################################################################
	# /etc/sysctl.d/80-k8s.conf
	# Performance-Critical Kernel Optimizations for Kubernetes Nodes
	# Target: Production K8s clusters with kube-proxy (nftables mode)
	# Workload: High-concurrency microservices, inter-pod networking, high connection churn
	# Hardware: Enterprise-grade servers (16GB+ RAM, 10Gbps+ network recommended)
	# Apply with: sysctl --system
	################################################################################

	########################
	# MEMORY MANAGEMENT
	########################
	# Reduced swapping - keeps containers in RAM, critical for pod performance
	# Impact: 15-30% for containerized workloads, prevents swap death during OOM pressure
	vm.swappiness = 10

	# Enable memory overcommit - essential for container memory allocation
	# Impact: Prevents container startup failures, enables efficient memory utilization
	vm.overcommit_memory = 1

	# Overcommit ratio at 100% - allows allocation equal to physical RAM + swap
	# Impact: Balances memory utilization with stability for container workloads
	vm.overcommit_ratio = 100

	# Background writeback at 5% of RAM - smooths I/O under heavy container loads
	# Impact: 10-20% I/O performance improvement, prevents write storms
	vm.dirty_background_ratio = 5

	# Dirty ratio at 10% - aggressive writeback for responsiveness
	# Impact: 15-25% I/O performance under database-heavy container workloads
	vm.dirty_ratio = 10

	# Writeback every 5 seconds - more frequent writes reduce burst I/O
	# Impact: 5-10% smoother I/O patterns, better latency predictability
	vm.dirty_writeback_centisecs = 500

	# Reduce VFS cache pressure - keeps container metadata in memory
	# Impact: 5-15% performance for filesystem-heavy workloads (container image layers)
	vm.vfs_cache_pressure = 50

	# Minimum free memory reserve (128MB) - prevents OOM deadlocks
	# Impact: System stability under memory pressure, critical for kubelet stability
	vm.min_free_kbytes = 131072

	########################
	# TCP CONNECTION HANDLING
	########################
	# SYN backlog queue: 8192 - handles connection bursts during service scaling
	# Impact: 10-25% connection acceptance rate during pod autoscaling events
	net.ipv4.tcp_max_syn_backlog = 8192

	# Reuse TIME_WAIT sockets - accelerates socket recycling for microservices
	# Impact: 20-40% for connection-heavy workloads, reduces port exhaustion
	net.ipv4.tcp_tw_reuse = 1

	# Maximum TIME_WAIT buckets: 1.44M - accommodates high connection churn
	# Impact: Prevents kernel panic under extreme connection turnover rates
	net.ipv4.tcp_max_tw_buckets = 1440000

	# FIN timeout: 30s - faster connection cleanup than default (60s)
	# Impact: 10-20% faster resource recovery for short-lived connections
	net.ipv4.tcp_fin_timeout = 30

	# Connection queue limit: 65535 - accommodates bursty traffic patterns
	# Impact: 15-30% reduction in connection drops for service-to-service traffic
	net.core.somaxconn = 65535

	# Maximum orphaned sockets: 262144 - handles socket leaks gracefully
	# Impact: System stability under connection stress
	net.ipv4.tcp_max_orphans = 262144

	########################
	# TCP CONGESTION & THROUGHPUT
	########################
	# BBR congestion control - superior for cloud/lossy networks
	# Impact: 10-40% throughput improvement vs Cubic for inter-datacenter traffic
	net.ipv4.tcp_congestion_control = bbr

	# TCP window scaling (RFC 1323) - enables >64KB windows
	# Impact: 5-15% throughput on high-latency networks (>100ms)
	net.ipv4.tcp_window_scaling = 1

	# Selective ACKs (SACK) - recovers faster from packet loss
	# Impact: 5-20% throughput improvement on lossy networks
	net.ipv4.tcp_sack = 1

	# Disable slow start after idle - maintains high throughput
	# Impact: 10-30% for long-lived connections with idle periods (gRPC, WebSocket)
	net.ipv4.tcp_slow_start_after_idle = 0

	# TCP Fast Open (Cookie Mode) - reduces connection establishment latency
	# Impact: 5-15% latency reduction for microservice communication
	net.ipv4.tcp_fastopen = 3

	########################
	# NETWORK BUFFERS & QDISC
	########################
	# Interface packet backlog: 30,000 - handles bursty traffic on 10Gbps+
	# Impact: 15-30% reduction in packet drops under high network load
	net.core.netdev_max_backlog = 30000

	# Maximum socket receive buffer: 16MB
	# Impact: Enables high-throughput transfers on high-latency networks
	net.core.rmem_max = 16777216

	# Maximum socket send buffer: 16MB
	# Impact: Improves bulk data transmission performance
	net.core.wmem_max = 16777216

	# TCP receive buffer: 4KB min, 85KB default, 16MB max
	# Impact: Auto-tuning optimizes throughput for varying network conditions
	net.ipv4.tcp_rmem = 4096 87380 16777216

	# TCP send buffer: 4KB min, 64KB default, 16MB max
	# Impact: Auto-tuning enables optimal bandwidth-delay product utilization
	net.ipv4.tcp_wmem = 4096 65536 16777216

	# Fair Queueing (fq) scheduler - required for BBR
	# Impact: Essential for BBR congestion control effectiveness
	net.core.default_qdisc = fq

	########################
	# CONNECTION TRACKING
	########################
	# Maximum tracked connections: 2,097,152
	# Impact: Prevents conntrack exhaustion in large clusters (50+ nodes, 1000+ pods)
	net.netfilter.nf_conntrack_max = 2097152

	# Established connection timeout: 24 hours
	# Impact: Balances memory usage with state preservation for long-lived connections
	net.netfilter.nf_conntrack_tcp_timeout_established = 86400

	########################
	# ROUTING & BRIDGE (K8s Required)
	########################
	# Enable IPv4 forwarding - required for pod-to-pod communication
	# Impact: Essential functionality for Kubernetes networking
	net.ipv4.ip_forward = 1

	# Bridge firewall integration - required for kube-proxy network policies
	# Impact: Essential functionality for K8s network policies
	net.bridge.bridge-nf-call-iptables = 1
	net.bridge.bridge-nf-call-ip6tables = 1
	net.bridge.bridge-nf-call-arptables = 1

	# IPv6 forwarding - required for dual-stack K8s clusters
	# Impact: Essential functionality for IPv6 support
	net.ipv6.conf.all.forwarding = 1
	net.ipv6.conf.default.forwarding = 1

	########################
	# ARP CACHE
	########################
	# ARP cache thresholds - prevent neighbor table overflow in large clusters
	# Impact: Critical stability parameter for clusters with 100+ nodes
	net.ipv4.neigh.default.gc_thresh1 = 2048
	net.ipv4.neigh.default.gc_thresh2 = 4096
	net.ipv4.neigh.default.gc_thresh3 = 8192

	########################
	# FILE SYSTEM & LIMITS
	########################
	# Maximum open file descriptors: 2,097,152
	# Impact: Prevents resource exhaustion for container runtime and high-concurrency services
	fs.file-max = 2097152

	# Inotify watches: 524,288 - monitors container filesystem changes
	# Impact: Critical for Kubernetes components (kubelet, containerd) and sidecars
	fs.inotify.max_user_watches = 524288

	# Maximum async I/O operations: 1,048,576
	# Impact: Enables high-performance storage for container volumes
	fs.aio-max-nr = 1048576

	########################
	# NETWORK PERFORMANCE
	########################
	# Packet processing budget: 600 packets per NAPI cycle
	# Impact: 10-20% throughput improvement on multi-core servers
	net.core.netdev_budget = 600

	# Budget time: 5ms per NAPI cycle
	# Impact: Balances throughput with latency for interactive workloads
	net.core.netdev_budget_usecs = 5000

	########################
	# SECURITY
	########################
	# Address space layout randomization
	kernel.randomize_va_space = 2

	# Kernel pointer restrictions
	kernel.dmesg_restrict = 1
	kernel.kptr_restrict = 1

	# ptrace scope - prevents attaching to non-child processes
	kernel.yama.ptrace_scope = 1

	# Disable core dumps (production security)
	kernel.core_pattern = \|/bin/false

	# SYN cookies protection (DDoS resistance)
	net.ipv4.tcp_syncookies = 1

	# SYN/SYNACK retry limits (prevents resource exhaustion)
	net.ipv4.tcp_syn_retries = 2
	net.ipv4.tcp_synack_retries = 2

	# TCP TIME-WAIT assassination protection
	net.ipv4.tcp_rfc1337 = 1

	# Reverse path filtering (anti-spoofing)
	net.ipv4.conf.all.rp_filter = 1
	net.ipv4.conf.default.rp_filter = 1

	# Disable ICMP redirects (IPv4 & IPv6)
	net.ipv4.conf.all.accept_redirects = 0
	net.ipv4.conf.default.accept_redirects = 0
	net.ipv4.conf.all.send_redirects = 0
	net.ipv4.conf.default.send_redirects = 0
	net.ipv6.conf.all.accept_redirects = 0
	net.ipv6.conf.default.accept_redirects = 0

	# Disable source routing
	net.ipv4.conf.all.accept_source_route = 0
	net.ipv4.conf.default.accept_source_route = 0
	net.ipv6.conf.all.accept_source_route = 0
	net.ipv6.conf.default.accept_source_route = 0

	# Log martian packets (spoofing detection)
	net.ipv4.conf.all.log_martians = 1
	net.ipv4.conf.default.log_martians = 1

	# Broadcast ping protection
	net.ipv4.icmp_echo_ignore_broadcasts = 1

	# Bogus ICMP error protection
	net.ipv4.icmp_ignore_bogus_error_responses = 1

	########################
	# KUBERNETES SPECIFIC TUNING
	########################
	# MTU probing enables Path MTU Discovery for container networks
	# Impact: Prevents fragmentation issues in overlay networks (Calico, Cilium, Flannel)
	net.ipv4.tcp_mtu_probing = 1

	# Don't save TCP metrics - reduces kernel overhead for short-lived connections
	# Impact: 5-10% performance for microservice communication patterns
	net.ipv4.tcp_no_metrics_save = 1

	# TCP autocorking - reduces small packet overhead
	# Impact: 5-15% throughput improvement for RPC workloads
	net.ipv4.tcp_autocorking = 1

	# Port range expansion - accommodates high connection counts
	# Impact: Prevents ephemeral port exhaustion at scale
	net.ipv4.ip_local_port_range = 1024 65535
	################################################################################
	# /etc/sysctl.d/80-pve.conf
	# Performance-Critical Proxmox VE Host Tuning
	# Optimized for: Virtualization host with high-concurrency VMs/containers
	# Workload: Database, web services, network-intensive applications
	# Hardware: 64GB+ RAM, multi-core CPU, high-speed storage
	# Apply with: sysctl --system
	################################################################################

	########################
	# MEMORY MANAGEMENT
	########################
	# CRITICAL: Strongly prefer dropping caches over swapping VM memory
	# Impact: >10% for VM workloads by preventing swap thrashing and maintaining performance
	vm.swappiness = 10

	# CRITICAL: Tend to keep dentry/inode caches longer for VM disk access patterns
	# Impact: 5-10% reduction in storage I/O for frequently-accessed VM files
	vm.vfs_cache_pressure = 50

	# CRITICAL: Limit dirty pages to prevent I/O spikes that stall VMs
	# Impact: >10% by avoiding long synchronous write pauses that freeze VMs
	vm.dirty_ratio = 10
	vm.dirty_background_ratio = 5

	# Maintain minimum free memory to prevent OOM under burst loads
	# Adjust: 128MB for 16GB, 256MB for 32GB, 512MB for 64GB+ systems
	# Impact: System stability - prevents catastrophic OOM situations
	vm.min_free_kbytes = 524288

	# Required for containerized workloads (Elasticsearch, databases)
	# Impact: Enables specific applications to function without errors
	vm.max_map_count = 262144

	########################
	# TCP & NETWORK STACK
	########################
	# BBR congestion control - better throughput for high-BDP networks
	# Impact: 10-40% improvement in network throughput, especially over WAN/high-latency links
	net.ipv4.tcp_congestion_control = bbr
	net.core.default_qdisc = fq

	# TCP Fast Open - reduces latency for repeated connections
	# Impact: 5-15% latency reduction for web/database clients with persistent connections
	net.ipv4.tcp_fastopen = 3
	net.ipv4.tcp_fastopen_blackhole_timeout_sec = 0

	# Disable TCP slow start after idle (critical for long-lived DB connections)
	# Impact: >5% throughput for databases, Redis, and other persistent connections
	net.ipv4.tcp_slow_start_after_idle = 0

	# Optimize connection queues for high-concurrency workloads
	# Impact: Enables handling of sudden connection spikes without drops
	net.core.somaxconn = 8192
	net.ipv4.tcp_max_syn_backlog = 8192
	net.core.netdev_max_backlog = 16384

	# Large TCP buffers for high-throughput transfers
	# Impact: 10-30% improvement for large file transfers, backups, VM migrations
	net.core.rmem_max = 33554432
	net.core.wmem_max = 33554432
	net.ipv4.tcp_mem = 65536 131072 262144

	# Reduce TIME_WAIT state duration to free resources faster
	# Impact: Higher connection turnover capacity for web/proxy servers
	net.ipv4.tcp_fin_timeout = 15
	net.ipv4.tcp_tw_reuse = 1

	# Maximum ephemeral ports for high-concurrency outbound connections
	# Impact: Prevents port exhaustion under extreme load
	net.ipv4.ip_local_port_range = 1024 65535

	# Reduce latency on write operations by sending smaller buffers more frequently
	# Impact: 5-10% latency reduction for interactive protocols
	net.ipv4.tcp_notsent_lowat = 16384

	########################
	# CONNECTION TRACKING
	########################
	# Maximum connection tracking entries for high-concurrency environments
	# Impact: Prevents connection table exhaustion with many containers/VMs
	net.netfilter.nf_conntrack_max = 1048576

	# Optimized timeouts to free resources faster while maintaining stability
	# Impact: Better memory utilization and higher connection throughput
	net.netfilter.nf_conntrack_tcp_timeout_established = 43200
	net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
	net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
	net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
	net.netfilter.nf_conntrack_udp_timeout = 30
	net.netfilter.nf_conntrack_udp_timeout_stream = 120
	net.netfilter.nf_conntrack_generic_timeout = 120
	net.netfilter.nf_conntrack_icmp_timeout = 30

	########################
	# SYSTEM RESOURCE LIMITS
	########################
	# Maximum open file handles - critical for databases and high-concurrency web servers
	# Impact: Prevents "too many open files" errors under heavy load
	fs.file-max = 2097152

	# Maximum processes and threads - essential for container orchestration
	# Impact: Enables running thousands of containers/processes without hitting limits
	kernel.pid_max = 4194304
	kernel.threads-max = 524288

	# Shared memory limits - required for databases (PostgreSQL, Oracle) and large VMs
	# Impact: Enables databases to allocate necessary shared memory segments
	kernel.shmmax = 68719476736 # 64GB
	kernel.shmall = 4294967296 # Page count for 64GB

	# Semaphore limits - adjusts IPC capacity for concurrent processes
	# Impact: Improves database and application IPC throughput
	kernel.sem = 250 32000 100 128

	########################
	# INOTIFY
	########################
	# Inotify limits for container orchestration and monitoring tools
	# Impact: Prevents monitoring failures in containerized environments
	fs.inotify.max_user_instances = 512
	fs.inotify.max_user_watches = 524288

	########################
	# BPF JIT OPTIMIZATION
	########################
	# Enable BPF JIT compiler for eBPF-based monitoring, networking (Cilium), and security tools
	# Impact: 20-50% performance improvement for eBPF programs (monitoring, observability, service mesh)
	net.core.bpf_jit_enable = 1
	net.core.bpf_jit_harden = 2 # Maintains security while enabling performance

	########################
	# REQUIRED PROXMOX VE FUNCTIONALITY
	########################
	# IP forwarding for VM/container routing and NAT
	net.ipv4.ip_forward = 1
	net.ipv6.conf.all.forwarding = 1

	# Bridge netfilter - REQUIRED for Proxmox VE firewall on VM bridges
	net.bridge.bridge-nf-call-iptables = 1
	net.bridge.bridge-nf-call-ip6tables = 1
	net.bridge.bridge-nf-call-arptables = 1

	########################
	# SECURITY
	########################
	# SYN cookies - protect against SYN flood attacks
	net.ipv4.tcp_syncookies = 1

	# Reverse path filtering - prevent IP spoofing
	net.ipv4.conf.all.rp_filter = 1
	net.ipv4.conf.default.rp_filter = 1

	# Disable ICMP redirects - prevent MITM attacks
	net.ipv4.conf.all.accept_redirects = 0
	net.ipv4.conf.default.accept_redirects = 0
	net.ipv6.conf.all.accept_redirects = 0
	net.ipv6.conf.default.accept_redirects = 0

	# Disable source routing
	net.ipv4.conf.all.accept_source_route = 0
	net.ipv4.conf.default.accept_source_route = 0
	net.ipv6.conf.all.accept_source_route = 0
	net.ipv6.conf.default.accept_source_route = 0

	# Ignore ICMP echo broadcasts
	net.ipv4.icmp_echo_ignore_broadcasts = 1
	net.ipv4.icmp_ignore_bogus_error_responses = 1

	# TCP RFC 1337 protection
	net.ipv4.tcp_rfc1337 = 1

	# Kernel hardening
	kernel.kptr_restrict = 1
	kernel.dmesg_restrict = 1
	kernel.perf_event_paranoid = 2
	kernel.randomize_va_space = 2
	kernel.core_pattern = /dev/null
	kernel.yama.ptrace_scope = 1

	########################
	# MINOR PERFORMANCE TUNING
	########################
	# Disable NMI watchdog for slight CPU overhead reduction
	# Impact: <2% CPU savings on heavily loaded systems
	kernel.nmi_watchdog = 0

	# Huge pages - set to non-zero if using VMs with static hugepage backing
	# Impact: 5-10% for memory-intensive VMs when properly configured
	vm.nr_hugepages = 0

	# ZFS tuning - uncomment and adjust if using ZFS storage backend
	# Impact: Can improve ZFS performance by limiting ARC cache
	# vfs.zfs.arc_max = 10737418240
	# vfs.zfs.arc_min = 1073741824
	# vfs.zfs.zio.use_uma = 1