Lulalaby/Program.cs

## Program.cs
// VoiceTestBot.cs
// Drop this file into a console app that references DisCatSharp, DisCatSharp.Voice,
// DisCatSharp.Voice.Natives, and DisCatSharp.CommandsNext.
//
// Required NuGet packages:
//   DisCatSharp
//   DisCatSharp.Voice
//   DisCatSharp.Voice.Natives
//   DisCatSharp.CommandsNext
//
// Set environment variable DISCORD_TOKEN before running.
// Optional: set DISCORD_PREFIX (default: "!")
//
// Available commands:
//   !join [channel]  — join caller's voice channel or an explicit voice/stage channel
//   !leave           — disconnect from voice
//   !play <path>     — stream an audio file through ffmpeg (requires ffmpeg in PATH)
//   !stop            — stop playback
//   !pause           — pause playback
//   !resume          — resume playback
//   !listen          — toggle voice receive (dumps PCM packet info to console)
//   !recstart [fmt]  — start recording incoming audio (fmt: wav/mp3)
//   !recstop         — stop recording and finalize output file
//   !recstatus       — print recording status
//   !dave            — print current DAVE encryption status
//   !vdebug [mode]   — voice debug logs for current connection (mode: on/off/status)
//   !ping            — print WebSocket + UDP latency

using System.Diagnostics;
using System.Collections.Concurrent;
using System.Buffers.Binary;
using System.Threading;

using DisCatSharp;
using DisCatSharp.CommandsNext;
using DisCatSharp.CommandsNext.Attributes;
using DisCatSharp.Entities;
using DisCatSharp.Enums;
using DisCatSharp.EventArgs;
using DisCatSharp.Voice;
using DisCatSharp.Voice.EventArgs;

using Microsoft.Extensions.Logging;

using Serilog;

// ─────────────────────────────────────────────
// Entry point
// ─────────────────────────────────────────────

var logDirectory = Path.Combine(Environment.CurrentDirectory, "logs");
Directory.CreateDirectory(logDirectory);
var logPath = Path.Combine(logDirectory, "voice_test_bot.log");

Log.Logger = new LoggerConfiguration()
	.MinimumLevel.Verbose()
	.WriteTo.Console(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
	.WriteTo.Debug(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
	.WriteTo.File(logPath, rollingInterval: RollingInterval.Hour, outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
	.CreateLogger();

var token = Environment.GetEnvironmentVariable("DISCORD_TOKEN") ?? throw new InvalidOperationException("DISCORD_TOKEN not set");
var prefix = Environment.GetEnvironmentVariable("DISCORD_PREFIX") ?? "!";

var client = new DiscordClient(new DiscordConfiguration
{
    Token = token,
    TokenType = TokenType.Bot,
    Intents = DiscordIntents.AllUnprivileged | DiscordIntents.MessageContent,
    MinimumLogLevel = LogLevel.Trace,
	LoggerFactory = new Serilog.Extensions.Logging.SerilogLoggerFactory(Log.Logger, dispose: false),
});

// ── Voice ──────────────────────────────────────
client.UseVoice(new VoiceConfiguration
{
    EnableIncoming = true,       // receive incoming audio
    MaxDaveProtocolVersion = 1,  // enable DAVE E2EE (set to 0 to test without DAVE)
    EnableDebugLogging = true,   // toggles all debug/trace logs inside DisCatSharp.Voice
    DavePendingAudioBehavior = DavePendingAudioBehavior.PassThrough // set Drop to block send until DAVE is Active
});

// ── Commands ───────────────────────────────────
var commands = client.UseCommandsNext(new CommandsNextConfiguration
{
    StringPrefixes = [prefix],
    EnableDms = false,
});
commands.RegisterCommands<VoiceCommands>();

// ── Global voice diagnostics ───────────────────
client.Ready += (c, e) =>
{
    c.Logger.LogInformation("[Bot] Ready as {User}", c.CurrentUser.Username);
    return Task.CompletedTask;
};

await client.ConnectAsync();
await Task.Delay(Timeout.Infinite);

// ─────────────────────────────────────────────
// Voice command module
// ─────────────────────────────────────────────

public sealed class VoiceCommands : BaseCommandModule
{
    // Track whether receive logging is active per guild.
    private static bool _listenActive;
    private static readonly ConcurrentDictionary<uint, ReceiveDiagnostics> _receiveDiagnostics = new();
    private static readonly Lock _recorderSync = new();
    private static ReceiveRecorder? _recorder;
    // Track the current ffmpeg process per guild (single-guild test bot).
    private static Process? _ffmpeg;
    private static CancellationTokenSource? _playCts;

    // ── !join ──────────────────────────────────

    [Command("join"), Description("Join your voice channel, or provide a target voice/stage channel.")]
    public async Task JoinAsync(CommandContext ctx, [Description("Optional target channel mention/id/name.")] DiscordChannel? channel = null)
    {
        var targetChannel = channel ?? ctx.Member?.VoiceState?.Channel;
        if (targetChannel is null)
        {
            await ctx.RespondAsync("You are not in a voice channel. Use `!join <voice-channel>` to pick one explicitly.");
            return;
        }

        if (targetChannel.Type is not (ChannelType.Voice or ChannelType.Stage))
        {
            await ctx.RespondAsync($"`{targetChannel.Name}` is not a voice or stage channel.");
            return;
        }

        var voice = ctx.Client.GetVoice();
        var conn = await voice.ConnectAsync(targetChannel);

        // ── Wire per-connection events ──────────
        conn.UserSpeaking += OnUserSpeaking;
        conn.UserJoined   += OnUserJoined;
        conn.UserLeft     += OnUserLeft;
        conn.VoiceSocketErrored += OnSocketError;
        conn.VoiceReceived += OnVoiceReceived;

        ctx.Client.Logger.LogInformation(
            "[Voice] Connected to {Channel} in {Guild}", targetChannel.Name, targetChannel.Guild.Name);

        await ctx.RespondAsync($"✅ Joined **{targetChannel.Name}**. DAVE is negotiated automatically — check logs for encryption status.");
    }

    // ── !leave ─────────────────────────────────

    [Command("leave"), Description("Disconnect from voice.")]
    public async Task LeaveAsync(CommandContext ctx)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected.");
            return;
        }

        StopFfmpeg();
        StopRecorder();
        conn.Disconnect();
        await ctx.RespondAsync("👋 Disconnected.");
    }

    // ── !play <path> ───────────────────────────

    [Command("play"), Description("Play an audio file (requires ffmpeg in PATH).")]
    public async Task PlayAsync(CommandContext ctx, [RemainingText, Description("Path or URL to audio file.")] string path)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected — use `!join` first.");
            return;
        }

        if (!TryResolveAudioPath(path, out var resolvedPath))
        {
            await ctx.RespondAsync(
                "File not found.\n" +
                $"• input: `{path}`\n" +
                $"• cwd: `{Environment.CurrentDirectory}`\n" +
                $"• base: `{AppContext.BaseDirectory}`");
            return;
        }

        StopFfmpeg();
        _playCts = new CancellationTokenSource();

        await ctx.RespondAsync($"▶️ Playing `{Path.GetFileName(resolvedPath)}`…");

        // Fire-and-forget so the command returns immediately.
        _ = Task.Run(() => StreamFileAsync(conn, resolvedPath, _playCts.Token), _playCts.Token);
    }

    private static bool TryResolveAudioPath(string path, out string resolvedPath)
    {
        resolvedPath = path;

        if (string.IsNullOrWhiteSpace(path))
            return false;

        // Normalize accidental shell-style quotes from command input.
        var trimmed = path.Trim().Trim('"');

        if (Path.IsPathRooted(trimmed))
        {
            if (File.Exists(trimmed))
            {
                resolvedPath = trimmed;
                return true;
            }

            return false;
        }

        var candidates = new[]
        {
            trimmed,
            Path.Combine(Environment.CurrentDirectory, trimmed),
            Path.Combine(AppContext.BaseDirectory, trimmed)
        };

        foreach (var candidate in candidates)
        {
            if (!File.Exists(candidate))
                continue;

            resolvedPath = candidate;
            return true;
        }

        return false;
    }

    private static async Task StreamFileAsync(VoiceConnection conn, string path, CancellationToken ct)
    {
        var transmit = conn.GetTransmitSink();
        var log = Log.Logger;

        _ffmpeg = Process.Start(new ProcessStartInfo
        {
            FileName = "ffmpeg",
            Arguments = $"-hide_banner -loglevel error -i \"{path}\" -ac 2 -f s16le -ar 48000 pipe:1",
            RedirectStandardOutput = true,
            RedirectStandardError = true,
            UseShellExecute = false,
        })!;

        // Drain ffmpeg stderr asynchronously so it never blocks the audio pipe.
        var localFfmpeg = _ffmpeg;
        _ = Task.Run(async () =>
        {
            string? line;
            while ((line = await localFfmpeg.StandardError.ReadLineAsync().ConfigureAwait(false)) is not null)
                log.Error("[VoiceTest] ffmpeg stderr: {Line}", line);
        }, CancellationToken.None);

        try
        {
            await conn.SendSpeakingAsync(SpeakingFlags.Microphone);

            var pcm = _ffmpeg.StandardOutput.BaseStream;
            var buf = new byte[transmit.SampleLength];
            int read;
            var totalBytes = 0;

            while (!ct.IsCancellationRequested &&
                   (read = await pcm.ReadAsync(buf.AsMemory(0, buf.Length), ct)) > 0)
            {
                totalBytes += read;
                log.Debug("[VoiceTest] ffmpeg produced {Bytes} bytes of PCM (total: {Total})", read, totalBytes);
                await transmit.WriteAsync(buf.AsMemory(0, read), ct);
            }

            log.Information("[VoiceTest] PCM stream ended — {Total} bytes fed to transmit sink", totalBytes);
            await transmit.FlushAsync(ct);
            await conn.WaitForPlaybackFinishAsync();
        }
        catch (OperationCanceledException) { /* stopped intentionally */ }
        catch (Exception ex)
        {
            log.Error(ex, "[VoiceTest] Exception in StreamFileAsync for path={Path}", path);
        }
        finally
        {
            await conn.SendSpeakingAsync(SpeakingFlags.NotSpeaking);
            StopFfmpeg();
        }
    }

    // ── !stop ──────────────────────────────────

    [Command("stop"), Description("Stop playback.")]
    public async Task StopAsync(CommandContext ctx)
    {
        StopFfmpeg();
        await ctx.RespondAsync("⏹️ Stopped.");
    }

    // ── !pause ─────────────────────────────────

    [Command("pause"), Description("Pause playback.")]
    public async Task PauseAsync(CommandContext ctx)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        conn?.Pause();
        await ctx.RespondAsync("⏸️ Paused.");
    }

    // ── !resume ────────────────────────────────

    [Command("resume"), Description("Resume playback.")]
    public async Task ResumeAsync(CommandContext ctx)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is not null)
            await conn.ResumeAsync();
        await ctx.RespondAsync("▶️ Resumed.");
    }

    // ── !listen ────────────────────────────────

    [Command("listen"), Description("Toggle receive logging (prints packet info to console).")]
    public async Task ListenAsync(CommandContext ctx)
    {
        _listenActive = !_listenActive;
        await ctx.RespondAsync(_listenActive
            ? "🎧 Receive logging **ON** — incoming packets will be printed to console."
            : "🔇 Receive logging **OFF**.");
    }

    // ── !recstart [wav|mp3] ───────────────────

    [Command("recstart"), Description("Start recording incoming audio to logs/recordings (wav/mp3).")]
    public async Task RecStartAsync(CommandContext ctx, [Description("Output format: wav or mp3 (default wav).")] string format = "wav")
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected — use `!join` first.");
            return;
        }

        if (!TryParseRecordingFormat(format, out var recordingFormat))
        {
            await ctx.RespondAsync("Invalid format. Use `wav` or `mp3`.");
            return;
        }

        ReceiveRecorder recorder;
        var alreadyRunningName = string.Empty;
        lock (_recorderSync)
        {
            if (_recorder is not null)
            {
                alreadyRunningName = Path.GetFileName(_recorder.TargetPath);
                recorder = _recorder;
            }
            else
            {
                var recordingsDirectory = Path.Combine(Environment.CurrentDirectory, "logs", "recordings");
                Directory.CreateDirectory(recordingsDirectory);
                var timestamp = DateTimeOffset.UtcNow.ToString("yyyyMMdd_HHmmss");
                var basePath = Path.Combine(recordingsDirectory, $"voice_{ctx.Guild.Id}_{timestamp}");
                recorder = new ReceiveRecorder(basePath, recordingFormat);
                _recorder = recorder;
            }
        }

        if (!string.IsNullOrEmpty(alreadyRunningName))
        {
            await ctx.RespondAsync($"Recording already running: `{alreadyRunningName}`");
            return;
        }

        _listenActive = true;
        await ctx.RespondAsync(
            $"⏺️ Recording started.\n" +
            $"• format: `{recordingFormat.ToString().ToLowerInvariant()}`\n" +
            $"• output: `{recorder.TargetPath}`\n" +
            "Recorder mixes all received participant audio into one timeline.");
    }

    // ── !recstop ───────────────────────────────

    [Command("recstop"), Description("Stop recording and finalize output file.")]
    public async Task RecStopAsync(CommandContext ctx)
    {
        var summary = StopRecorder();
        if (summary is null)
        {
            await ctx.RespondAsync("No active recording.");
            return;
        }

        await ctx.RespondAsync(
            "⏹️ Recording stopped.\n" +
            $"• file: `{summary.OutputPath}`\n" +
            $"• participants: `{summary.ParticipantCount}`\n" +
            $"• frames: `{summary.FrameCount}`\n" +
            $"• inserted silence frames: `{summary.InsertedSilenceFrames}`\n" +
            $"• captured bytes: `{summary.PcmBytes}`\n" +
            $"• participant stems: `{summary.StemDirectory ?? "n/a"}`");
    }

    // ── !recstatus ─────────────────────────────

    [Command("recstatus"), Description("Show recording status.")]
    public async Task RecStatusAsync(CommandContext ctx)
    {
        ReceiveRecorder? recorder;
        lock (_recorderSync)
            recorder = _recorder;

        if (recorder is null)
        {
            await ctx.RespondAsync("No active recording.");
            return;
        }

        var snapshot = recorder.GetSnapshot();
        await ctx.RespondAsync(
            "🎙️ Recording active.\n" +
            $"• target: `{snapshot.TargetPath}`\n" +
            $"• participants: `{snapshot.ParticipantCount}`\n" +
            $"• frames: `{snapshot.FrameCount}`\n" +
            $"• inserted silence frames: `{snapshot.InsertedSilenceFrames}`\n" +
            $"• captured bytes: `{snapshot.PcmBytes}`");
    }

    // ── !dave ──────────────────────────────────

    [Command("dave"), Description("Print DAVE encryption status.")]
    public async Task DaveAsync(CommandContext ctx)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected.");
            return;
        }

        // VoiceConnection doesn't expose a public IsDaveActive property yet,
        // so we report configuration intent.
        await ctx.RespondAsync(
            "**DAVE status:**\n" +
            "• DAVE is enabled when `MaxDaveProtocolVersion = 1` (current config).\n" +
            "• Whether the server negotiated DAVE for this session is visible in the Debug logs " +
              "(look for `DAVE_VERSION` gateway opcode).\n" +
            "• If libdave was not found at startup, DAVE is automatically disabled and a " +
              "`LogError` was emitted.");
    }

    // ── !ping ──────────────────────────────────

    [Command("ping"), Description("Print voice gateway and UDP latency.")]
    public async Task PingAsync(CommandContext ctx)
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected.");
            return;
        }

        await ctx.RespondAsync(
            $"🏓 **WebSocket ping:** {conn.WebSocketPing} ms\n" +
            $"📡 **UDP ping:** {conn.UdpPing} ms");
    }

    // ── !vdebug [on|off|status] ────────────────

    [Command("vdebug"), Description("Toggle or show voice debug logging for this active voice connection.")]
    public async Task VoiceDebugAsync(
        CommandContext ctx,
        [Description("on/off/status (default status).")] string mode = "status")
    {
        var voice = ctx.Client.GetVoice();
        var conn = voice.GetConnection(ctx.Guild);
        if (conn is null)
        {
            await ctx.RespondAsync("Not connected.");
            return;
        }

        if (string.Equals(mode, "status", StringComparison.OrdinalIgnoreCase))
        {
            await ctx.RespondAsync($"Voice debug logging is currently **{(conn.EnableDebugLogging ? "ON" : "OFF")}** for this connection.");
            return;
        }

        if (string.Equals(mode, "on", StringComparison.OrdinalIgnoreCase))
        {
            conn.EnableDebugLogging = true;
            await ctx.RespondAsync("✅ Voice debug logging enabled for this connection.");
            return;
        }

        if (string.Equals(mode, "off", StringComparison.OrdinalIgnoreCase))
        {
            conn.EnableDebugLogging = false;
            await ctx.RespondAsync("✅ Voice debug logging disabled for this connection.");
            return;
        }

        await ctx.RespondAsync("Usage: `!vdebug on`, `!vdebug off`, or `!vdebug status`.");
    }

    // ─────────────────────────────────────────
    // Event handlers
    // ─────────────────────────────────────────

    private static Task OnUserSpeaking(VoiceConnection conn, UserSpeakingEventArgs e)
    {
        Log.Logger.Debug(
            "[Voice] {User} is {State} (SSRC {Ssrc})",
            e.User?.Username ?? "unknown",
            e.Speaking is SpeakingFlags.NotSpeaking ? "silent" : "speaking",
            e.Ssrc);
        return Task.CompletedTask;
    }

    private static Task OnUserJoined(VoiceConnection conn, VoiceUserJoinEventArgs e)
    {
        Log.Logger.Information(
            "[Voice] {User} joined (SSRC {Ssrc})", e.User.Username, e.Ssrc);
        return Task.CompletedTask;
    }

    private static Task OnUserLeft(VoiceConnection conn, VoiceUserLeaveEventArgs e)
    {
        Log.Logger.Information(
            "[Voice] {User} left (SSRC {Ssrc})", e.User.Username, e.Ssrc);
        return Task.CompletedTask;
    }

    private static Task OnSocketError(VoiceConnection conn, SocketErrorEventArgs e)
    {
        Log.Logger.Error(e.Exception, "[Voice] WebSocket error");
        return Task.CompletedTask;
    }

    private static Task OnVoiceReceived(VoiceConnection conn, VoiceReceiveEventArgs e)
    {
        ReceiveRecorder? recorder;
        lock (_recorderSync)
            recorder = _recorder;
        recorder?.TryWrite(e);

        if (!_listenActive)
            return Task.CompletedTask;

        var pcm = e.PcmData.Span;
        var sampleCount = pcm.Length / 2;
        long sumSquares = 0;
        var peak = 0;
        var nonTrivialSamples = 0;
        for (var i = 0; i + 1 < pcm.Length; i += 2)
        {
            var sample = BinaryPrimitives.ReadInt16LittleEndian(pcm.Slice(i, 2));
            var abs = Math.Abs((int)sample);
            if (abs > peak)
                peak = abs;

            // Ignore near-zero quantization noise for "activity" checks.
            if (abs > 64)
                nonTrivialSamples++;

            sumSquares += (long)sample * sample;
        }

        var rms = sampleCount > 0
            ? Math.Sqrt(sumSquares / (double)sampleCount) / short.MaxValue
            : 0d;
        var activityRatio = sampleCount > 0
            ? nonTrivialSamples / (double)sampleCount
            : 0d;

        // A "usable" decoded frame should carry more than just tiny dithering/noise.
        var usable = sampleCount > 0 && (rms >= 0.0025 || activityRatio >= 0.01);

        var diag = _receiveDiagnostics.GetOrAdd(e.Ssrc, static _ => new());
        var now = DateTimeOffset.UtcNow;
        var missingFrames = 0;
        lock (diag.Sync)
        {
            if (diag.LastPacketAt is { } lastPacketAt)
            {
                var gapMs = (now - lastPacketAt).TotalMilliseconds - e.AudioDuration;
                if (gapMs >= e.AudioDuration * 1.5)
                    missingFrames = (int)Math.Floor(gapMs / Math.Max(1, e.AudioDuration));
            }

            diag.LastPacketAt = now;
            diag.TotalFrames++;
            if (usable)
                diag.UsableFrames++;
            if (missingFrames > 0)
                diag.MissingFrames += missingFrames;
        }

        Log.Logger.Debug(
            "[Receive] SSRC={Ssrc} User={User} usable={Usable} rms={Rms:0.0000} active={Active:0.0%} peak={Peak} dur={Duration}ms PCM={PcmBytes}B Opus={OpusBytes}B missingFrames={MissingFrames}",
            e.Ssrc,
            e.User?.Username ?? "unknown",
            usable,
            rms,
            activityRatio,
            peak,
            e.AudioDuration,
            e.PcmData.Length,
            e.OpusData.Length,
            missingFrames);

        lock (diag.Sync)
        {
            if (diag.TotalFrames % 50 == 0)
            {
                var usableRatio = diag.TotalFrames > 0
                    ? diag.UsableFrames / (double)diag.TotalFrames
                    : 0d;
                Log.Logger.Information(
                    "[ReceiveSummary] SSRC={Ssrc} frames={Frames} usable={Usable:0.0%} missingFrames={MissingFrames}",
                    e.Ssrc, diag.TotalFrames, usableRatio, diag.MissingFrames);
            }
        }

        return Task.CompletedTask;
    }

    // ─────────────────────────────────────────
    // Helpers
    // ─────────────────────────────────────────

    private static void StopFfmpeg()
    {
        _playCts?.Cancel();
        _playCts = null;

        if (_ffmpeg is not null && !_ffmpeg.HasExited)
        {
            try { _ffmpeg.Kill(); } catch { /* already gone */ }
        }
        _ffmpeg = null;
    }

    private static RecordingSummary? StopRecorder()
    {
        ReceiveRecorder? recorder;
        lock (_recorderSync)
        {
            recorder = _recorder;
            _recorder = null;
        }

        if (recorder is null)
            return null;

        return recorder.Stop();
    }

    private static bool TryParseRecordingFormat(string value, out RecordingFormat format)
    {
        if (string.Equals(value, "wav", StringComparison.OrdinalIgnoreCase))
        {
            format = RecordingFormat.Wav;
            return true;
        }

        if (string.Equals(value, "mp3", StringComparison.OrdinalIgnoreCase))
        {
            format = RecordingFormat.Mp3;
            return true;
        }

        format = default;
        return false;
    }

    private sealed class ReceiveDiagnostics
    {
        public Lock Sync { get; } = new();
        public DateTimeOffset? LastPacketAt { get; set; }
        public int TotalFrames { get; set; }
        public int UsableFrames { get; set; }
        public int MissingFrames { get; set; }
    }

    private enum RecordingFormat
    {
        Wav,
        Mp3
    }

    private sealed class RecordingSnapshot
    {
        public required string TargetPath { get; init; }
        public required int ParticipantCount { get; init; }
        public required int FrameCount { get; init; }
        public required int InsertedSilenceFrames { get; init; }
        public required long PcmBytes { get; init; }
    }

    private sealed class RecordingSummary
    {
        public required string OutputPath { get; init; }
        public required string? StemDirectory { get; init; }
        public required int ParticipantCount { get; init; }
        public required int FrameCount { get; init; }
        public required int InsertedSilenceFrames { get; init; }
        public required long PcmBytes { get; init; }
    }

    private sealed class ReceiveRecorder
    {
        private readonly Lock _sync = new();
        private readonly string _wavPath;
        private readonly string _stemsDirectory;
        private readonly RecordingFormat _format;
        private readonly FileStream _wavStream;
        private readonly Dictionary<uint, ParticipantTrack> _tracks = [];
        private readonly HashSet<uint> _participantSsrcs = [];
        private readonly DateTimeOffset _startedAt = DateTimeOffset.UtcNow;

        private bool _stopped;
        private AudioFormat _audioFormat;
        private bool _audioFormatSet;
        private DateTimeOffset? _lastPacketAt;
        private int _frameDurationMs = 20;
        private int _frameByteLength;
        private byte[] _silenceFrameBuffer = [];
        private int _frameCount;
        private int _insertedSilenceFrames;
        private long _pcmBytes;

        private sealed class ParticipantTrack
        {
            public MemoryStream Pcm { get; set; } = new();
            public string Label { get; init; } = "unknown";
            public int StartOffsetFrames { get; set; }
            public int FramesWritten { get; set; }
        }

        public ReceiveRecorder(string basePath, RecordingFormat format)
        {
            _format = format;
            _wavPath = basePath + ".wav";
            _stemsDirectory = basePath + "_stems";
            this.TargetPath = format == RecordingFormat.Mp3 ? basePath + ".mp3" : _wavPath;
            _wavStream = new FileStream(_wavPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read);
            WriteWavHeaderPlaceholder(_wavStream);
        }

        public string TargetPath { get; }

        public void TryWrite(VoiceReceiveEventArgs e)
        {
            lock (_sync)
            {
                if (_stopped || e.PcmData.Length <= 0)
                    return;

                var now = DateTimeOffset.UtcNow;
                _participantSsrcs.Add(e.Ssrc);

                if (!_audioFormatSet)
                {
                    _audioFormat = e.AudioFormat;
                    _audioFormatSet = true;
                    _frameDurationMs = Math.Max(1, e.AudioDuration);
                    _frameByteLength = e.PcmData.Length;
                    _silenceFrameBuffer = new byte[e.PcmData.Length];
                    Log.Logger.Information(
                        "[Recorder] Multi-user track capture initialized from SSRC={Ssrc} User={User} Format={Rate}Hz/{Channels}ch frame={Duration}ms",
                        e.Ssrc,
                        e.User?.Username ?? "unknown",
                        e.AudioFormat.SampleRate,
                        e.AudioFormat.ChannelCount,
                        _frameDurationMs);
                }

                if (_audioFormatSet &&
                    (_audioFormat.SampleRate != e.AudioFormat.SampleRate || _audioFormat.ChannelCount != e.AudioFormat.ChannelCount))
                {
                    Log.Logger.Warning(
                        "[Recorder] Ignoring frame with format mismatch for SSRC={Ssrc} User={User}: got {Rate}Hz/{Channels}ch, expected {ExpectedRate}Hz/{ExpectedChannels}ch",
                        e.Ssrc,
                        e.User?.Username ?? "unknown",
                        e.AudioFormat.SampleRate,
                        e.AudioFormat.ChannelCount,
                        _audioFormat.SampleRate,
                        _audioFormat.ChannelCount);
                    return;
                }

                if (e.PcmData.Length != _frameByteLength)
                {
                    Log.Logger.Warning(
                        "[Recorder] Ignoring frame with size mismatch for SSRC={Ssrc} User={User}: got {Bytes} bytes, expected {ExpectedBytes} bytes",
                        e.Ssrc,
                        e.User?.Username ?? "unknown",
                        e.PcmData.Length,
                        _frameByteLength);
                    return;
                }

                if (!_tracks.TryGetValue(e.Ssrc, out var track))
                {
                    track = new ParticipantTrack
                    {
                        Label = SanitizeFileName(e.User?.Username ?? $"ssrc_{e.Ssrc}"),
                        StartOffsetFrames = GetLeadingFrames(now)
                    };
                    _tracks[e.Ssrc] = track;
                }

                track.Pcm.Write(e.PcmData.Span);
                track.FramesWritten++;

                _lastPacketAt = now;
            }
        }

        public RecordingSnapshot GetSnapshot()
        {
            lock (_sync)
            {
                return new RecordingSnapshot
                {
                    TargetPath = this.TargetPath,
                    ParticipantCount = _participantSsrcs.Count,
                    FrameCount = GetCurrentFrameCount(),
                    InsertedSilenceFrames = _insertedSilenceFrames,
                    PcmBytes = _pcmBytes
                };
            }
        }

        public RecordingSummary Stop()
        {
            lock (_sync)
            {
                if (_stopped)
                {
                    return new RecordingSummary
                    {
                        OutputPath = this.TargetPath,
                        StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null,
                        ParticipantCount = _participantSsrcs.Count,
                        FrameCount = _frameCount,
                        InsertedSilenceFrames = _insertedSilenceFrames,
                        PcmBytes = _pcmBytes
                    };
                }

                _stopped = true;

                if (_audioFormatSet)
                {
                    var finalFrameCount = GetCurrentFrameCount();
                    if (_lastPacketAt is { } lastPacketAt)
                    {
                        var trailingMs = (DateTimeOffset.UtcNow - lastPacketAt).TotalMilliseconds;
                        if (trailingMs >= _frameDurationMs * 1.5)
                            finalFrameCount += (int)Math.Floor(trailingMs / _frameDurationMs);
                    }

                    ExportStemFiles();
                    AlignTracks(finalFrameCount);
                    MixTracksToWav(finalFrameCount);
                }

                FinalizeWavFile(_wavStream, _pcmBytes);
                _wavStream.Dispose();

                var outputPath = this.TargetPath;
                if (_format == RecordingFormat.Mp3)
                {
                    if (!TranscodeToMp3(_wavPath, outputPath))
                    {
                        outputPath = _wavPath;
                        Log.Logger.Warning("[Recorder] MP3 transcoding failed. Keeping WAV output at {Path}", outputPath);
                    }
                }

                var durationSec = (DateTimeOffset.UtcNow - _startedAt).TotalSeconds;
                Log.Logger.Information(
                    "[Recorder] Finalized {Path} ({Participants} participants, {Frames} frames, {SilenceFrames} inserted silence frames, {Bytes} PCM bytes, {Duration:0.0}s)",
                    outputPath, _participantSsrcs.Count, _frameCount, _insertedSilenceFrames, _pcmBytes, durationSec);

                return new RecordingSummary
                {
                    OutputPath = outputPath,
                    StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null,
                    ParticipantCount = _participantSsrcs.Count,
                    FrameCount = _frameCount,
                    InsertedSilenceFrames = _insertedSilenceFrames,
                    PcmBytes = _pcmBytes
                };
            }
        }

        private int GetLeadingFrames(DateTimeOffset firstPacketAt)
        {
            var leadingMs = (firstPacketAt - _startedAt).TotalMilliseconds - _frameDurationMs;
            if (leadingMs < _frameDurationMs * 1.5)
                return 0;
            return (int)Math.Floor(leadingMs / _frameDurationMs);
        }

        private int GetCurrentFrameCount()
        {
            if (_tracks.Count == 0)
                return 0;
            return _tracks.Values.Max(track => track.StartOffsetFrames + track.FramesWritten);
        }

        private void AlignTracks(int frameCount)
        {
            foreach (var track in _tracks.Values)
            {
                if (track.StartOffsetFrames > 0)
                {
                    var aligned = new MemoryStream();
                    WriteSilenceFrames(aligned, track.StartOffsetFrames);
                    track.Pcm.Position = 0;
                    track.Pcm.CopyTo(aligned);
                    track.Pcm.Dispose();
                    track.Pcm = aligned;
                    track.FramesWritten += track.StartOffsetFrames;
                    _insertedSilenceFrames += track.StartOffsetFrames;
                    track.StartOffsetFrames = 0;
                }

                var missing = frameCount - track.FramesWritten;
                if (missing <= 0)
                    continue;
                WriteSilenceFrames(track.Pcm, missing);
                track.FramesWritten += missing;
                _insertedSilenceFrames += missing;
            }
        }

        private void MixTracksToWav(int frameCount)
        {
            if (_tracks.Count == 0 || _frameByteLength <= 0)
                return;

            var sampleCount = _frameByteLength / sizeof(short);
            var mixedSamples = new int[sampleCount];
            var activeContributors = new int[sampleCount];
            var mixedFrame = new byte[_frameByteLength];
            var readers = new (MemoryStream Stream, byte[] Buffer)[_tracks.Count];
            var idx = 0;
            foreach (var track in _tracks.Values)
            {
                track.Pcm.Position = 0;
                readers[idx++] = (track.Pcm, new byte[_frameByteLength]);
            }

            for (var frame = 0; frame < frameCount; frame++)
            {
                Array.Clear(mixedSamples, 0, mixedSamples.Length);
                Array.Clear(activeContributors, 0, activeContributors.Length);

                for (var r = 0; r < readers.Length; r++)
                {
                    var reader = readers[r];
                    var read = reader.Stream.Read(reader.Buffer, 0, reader.Buffer.Length);
                    if (read <= 0)
                        continue;

                    if (read < reader.Buffer.Length)
                        Array.Clear(reader.Buffer, read, reader.Buffer.Length - read);

                    var span = reader.Buffer.AsSpan();
                    for (var i = 0; i < sampleCount; i++)
                    {
                        var sample = BinaryPrimitives.ReadInt16LittleEndian(span.Slice(i * sizeof(short), sizeof(short)));
                        mixedSamples[i] += sample;
                        if (Math.Abs(sample) > 64)
                            activeContributors[i]++;
                    }
                }

                var output = mixedFrame.AsSpan();
                for (var i = 0; i < sampleCount; i++)
                {
                    var contributors = Math.Max(1, activeContributors[i]);
                    var mixed = (mixedSamples[i] / (double)contributors) * 0.98;
                    var clamped = Math.Clamp((int)Math.Round(mixed), short.MinValue, short.MaxValue);
                    BinaryPrimitives.WriteInt16LittleEndian(output.Slice(i * sizeof(short), sizeof(short)), (short)clamped);
                }

                _wavStream.Write(mixedFrame);
                _pcmBytes += mixedFrame.Length;
                _frameCount++;
            }
        }

        private void ExportStemFiles()
        {
            Directory.CreateDirectory(_stemsDirectory);

            foreach (var (ssrc, track) in _tracks)
            {
                var stemPath = Path.Combine(_stemsDirectory, $"{ssrc}_{track.Label}.wav");
                using var stemStream = new FileStream(stemPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read);
                WriteWavHeaderPlaceholder(stemStream);
                track.Pcm.Position = 0;
                track.Pcm.CopyTo(stemStream);
                FinalizeWavFile(stemStream, track.Pcm.Length);
            }
        }

        private void WriteSilenceFrames(Stream stream, int frameCount)
        {
            if (frameCount <= 0 || _silenceFrameBuffer.Length == 0)
                return;

            for (var i = 0; i < frameCount; i++)
                stream.Write(_silenceFrameBuffer);
        }

        private static bool TranscodeToMp3(string wavPath, string mp3Path)
        {
            try
            {
                using var ffmpeg = Process.Start(new ProcessStartInfo
                {
                    FileName = "ffmpeg",
                    Arguments = $"-y -hide_banner -loglevel error -i \"{wavPath}\" -codec:a libmp3lame -q:a 2 \"{mp3Path}\"",
                    RedirectStandardError = true,
                    UseShellExecute = false
                });

                if (ffmpeg is null)
                    return false;

                var stderr = ffmpeg.StandardError.ReadToEnd();
                ffmpeg.WaitForExit();
                if (ffmpeg.ExitCode != 0)
                {
                    Log.Logger.Error("[Recorder] ffmpeg failed with exit code {ExitCode}: {Stderr}", ffmpeg.ExitCode, stderr);
                    return false;
                }

                if (File.Exists(mp3Path))
                    File.Delete(wavPath);

                return true;
            }
            catch (Exception ex)
            {
                Log.Logger.Error(ex, "[Recorder] Exception while transcoding WAV to MP3");
                return false;
            }
        }

        private static string SanitizeFileName(string value)
        {
            var chars = value.ToCharArray();
            for (var i = 0; i < chars.Length; i++)
            {
                if (Array.IndexOf(Path.GetInvalidFileNameChars(), chars[i]) >= 0)
                    chars[i] = '_';
            }

            var sanitized = new string(chars).Trim();
            return string.IsNullOrWhiteSpace(sanitized) ? "unknown" : sanitized;
        }

        private static void WriteWavHeaderPlaceholder(Stream stream)
        {
            Span<byte> header = stackalloc byte[44];
            header.Clear();
            "RIFF"u8.CopyTo(header.Slice(0, 4));
            "WAVE"u8.CopyTo(header.Slice(8, 4));
            "fmt "u8.CopyTo(header.Slice(12, 4));
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16);
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1);
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16);
            "data"u8.CopyTo(header.Slice(36, 4));
            stream.Write(header);
        }

        private void FinalizeWavFile(Stream stream, long pcmBytes)
        {
            if (!_audioFormatSet)
                _audioFormat = AudioFormat.Default;

            Span<byte> header = stackalloc byte[44];
            header.Clear();
            "RIFF"u8.CopyTo(header.Slice(0, 4));
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(4, 4), (int)(36 + pcmBytes));
            "WAVE"u8.CopyTo(header.Slice(8, 4));
            "fmt "u8.CopyTo(header.Slice(12, 4));
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16);
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1);
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(22, 2), (short)_audioFormat.ChannelCount);
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(24, 4), _audioFormat.SampleRate);
            var byteRate = _audioFormat.SampleRate * _audioFormat.ChannelCount * sizeof(short);
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(28, 4), byteRate);
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(32, 2), (short)(_audioFormat.ChannelCount * sizeof(short)));
            BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16);
            "data"u8.CopyTo(header.Slice(36, 4));
            BinaryPrimitives.WriteInt32LittleEndian(header.Slice(40, 4), (int)pcmBytes);

            stream.Seek(0, SeekOrigin.Begin);
            stream.Write(header);
            stream.Flush();
        }
    }
}

## VoiceTestBot.csproj
<Project Sdk="Microsoft.NET.Sdk">

	<PropertyGroup>
		<OutputType>Exe</OutputType>
		<TargetFramework>net10.0</TargetFramework>
		<ImplicitUsings>enable</ImplicitUsings>
		<Nullable>enable</Nullable>
		<NuGetAuditSuppress>true</NuGetAuditSuppress>
		<NuGetAudit>false</NuGetAudit>
	</PropertyGroup>

	<ItemGroup>
		<PackageReference Include="DisCatSharp" Version="10.7.0-nightly-069" />
		<PackageReference Include="DisCatSharp.CommandsNext" Version="10.7.0-nightly-069" />
		<PackageReference Include="DisCatSharp.Voice" Version="10.7.0-nightly-069" />
		<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.3" />
		<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.3" />
		<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.3" />
		<PackageReference Include="Serilog" Version="4.3.1" />
		<PackageReference Include="Serilog.Extensions.Logging" Version="10.0.0" />
		<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
		<PackageReference Include="Serilog.Sinks.Debug" Version="3.0.0" />
		<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
		<PackageReference Include="System.Memory" Version="4.6.3" />
	</ItemGroup>
</Project>
	<Project Sdk="Microsoft.NET.Sdk">

	<PropertyGroup>
	<OutputType>Exe</OutputType>
	<TargetFramework>net10.0</TargetFramework>
	<ImplicitUsings>enable</ImplicitUsings>
	<Nullable>enable</Nullable>
	<NuGetAuditSuppress>true</NuGetAuditSuppress>
	<NuGetAudit>false</NuGetAudit>
	</PropertyGroup>

	<ItemGroup>
	<PackageReference Include="DisCatSharp" Version="10.7.0-nightly-069" />
	<PackageReference Include="DisCatSharp.CommandsNext" Version="10.7.0-nightly-069" />
	<PackageReference Include="DisCatSharp.Voice" Version="10.7.0-nightly-069" />
	<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.3" />
	<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.3" />
	<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.3" />
	<PackageReference Include="Serilog" Version="4.3.1" />
	<PackageReference Include="Serilog.Extensions.Logging" Version="10.0.0" />
	<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
	<PackageReference Include="Serilog.Sinks.Debug" Version="3.0.0" />
	<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
	<PackageReference Include="System.Memory" Version="4.6.3" />
	</ItemGroup>
	</Project>
No results found