Last active
March 8, 2026 23:13
-
-
Save Lulalaby/16ffc1263a7e60e438a11978e062f3a9 to your computer and use it in GitHub Desktop.
VoiceTestBot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // VoiceTestBot.cs | |
| // Drop this file into a console app that references DisCatSharp, DisCatSharp.Voice, | |
| // DisCatSharp.Voice.Natives, and DisCatSharp.CommandsNext. | |
| // | |
| // Required NuGet packages: | |
| // DisCatSharp | |
| // DisCatSharp.Voice | |
| // DisCatSharp.Voice.Natives | |
| // DisCatSharp.CommandsNext | |
| // | |
| // Set environment variable DISCORD_TOKEN before running. | |
| // Optional: set DISCORD_PREFIX (default: "!") | |
| // | |
| // Available commands: | |
| // !join [channel] — join caller's voice channel or an explicit voice/stage channel | |
| // !leave — disconnect from voice | |
| // !play <path> — stream an audio file through ffmpeg (requires ffmpeg in PATH) | |
| // !stop — stop playback | |
| // !pause — pause playback | |
| // !resume — resume playback | |
| // !listen — toggle voice receive (dumps PCM packet info to console) | |
| // !recstart [fmt] — start recording incoming audio (fmt: wav/mp3) | |
| // !recstop — stop recording and finalize output file | |
| // !recstatus — print recording status | |
| // !dave — print current DAVE encryption status | |
| // !vdebug [mode] — voice debug logs for current connection (mode: on/off/status) | |
| // !ping — print WebSocket + UDP latency | |
| using System.Diagnostics; | |
| using System.Collections.Concurrent; | |
| using System.Buffers.Binary; | |
| using System.Threading; | |
| using DisCatSharp; | |
| using DisCatSharp.CommandsNext; | |
| using DisCatSharp.CommandsNext.Attributes; | |
| using DisCatSharp.Entities; | |
| using DisCatSharp.Enums; | |
| using DisCatSharp.EventArgs; | |
| using DisCatSharp.Voice; | |
| using DisCatSharp.Voice.EventArgs; | |
| using Microsoft.Extensions.Logging; | |
| using Serilog; | |
| // ───────────────────────────────────────────── | |
| // Entry point | |
| // ───────────────────────────────────────────── | |
| var logDirectory = Path.Combine(Environment.CurrentDirectory, "logs"); | |
| Directory.CreateDirectory(logDirectory); | |
| var logPath = Path.Combine(logDirectory, "voice_test_bot.log"); | |
| Log.Logger = new LoggerConfiguration() | |
| .MinimumLevel.Verbose() | |
| .WriteTo.Console(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}") | |
| .WriteTo.Debug(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}") | |
| .WriteTo.File(logPath, rollingInterval: RollingInterval.Hour, outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}") | |
| .CreateLogger(); | |
| var token = Environment.GetEnvironmentVariable("DISCORD_TOKEN") ?? throw new InvalidOperationException("DISCORD_TOKEN not set"); | |
| var prefix = Environment.GetEnvironmentVariable("DISCORD_PREFIX") ?? "!"; | |
| var client = new DiscordClient(new DiscordConfiguration | |
| { | |
| Token = token, | |
| TokenType = TokenType.Bot, | |
| Intents = DiscordIntents.AllUnprivileged | DiscordIntents.MessageContent, | |
| MinimumLogLevel = LogLevel.Trace, | |
| LoggerFactory = new Serilog.Extensions.Logging.SerilogLoggerFactory(Log.Logger, dispose: false), | |
| }); | |
| // ── Voice ────────────────────────────────────── | |
| client.UseVoice(new VoiceConfiguration | |
| { | |
| EnableIncoming = true, // receive incoming audio | |
| MaxDaveProtocolVersion = 1, // enable DAVE E2EE (set to 0 to test without DAVE) | |
| EnableDebugLogging = true, // toggles all debug/trace logs inside DisCatSharp.Voice | |
| DavePendingAudioBehavior = DavePendingAudioBehavior.PassThrough // set Drop to block send until DAVE is Active | |
| }); | |
| // ── Commands ─────────────────────────────────── | |
| var commands = client.UseCommandsNext(new CommandsNextConfiguration | |
| { | |
| StringPrefixes = [prefix], | |
| EnableDms = false, | |
| }); | |
| commands.RegisterCommands<VoiceCommands>(); | |
| // ── Global voice diagnostics ─────────────────── | |
| client.Ready += (c, e) => | |
| { | |
| c.Logger.LogInformation("[Bot] Ready as {User}", c.CurrentUser.Username); | |
| return Task.CompletedTask; | |
| }; | |
| await client.ConnectAsync(); | |
| await Task.Delay(Timeout.Infinite); | |
| // ───────────────────────────────────────────── | |
| // Voice command module | |
| // ───────────────────────────────────────────── | |
| public sealed class VoiceCommands : BaseCommandModule | |
| { | |
| // Track whether receive logging is active per guild. | |
| private static bool _listenActive; | |
| private static readonly ConcurrentDictionary<uint, ReceiveDiagnostics> _receiveDiagnostics = new(); | |
| private static readonly Lock _recorderSync = new(); | |
| private static ReceiveRecorder? _recorder; | |
| // Track the current ffmpeg process per guild (single-guild test bot). | |
| private static Process? _ffmpeg; | |
| private static CancellationTokenSource? _playCts; | |
| // ── !join ────────────────────────────────── | |
| [Command("join"), Description("Join your voice channel, or provide a target voice/stage channel.")] | |
| public async Task JoinAsync(CommandContext ctx, [Description("Optional target channel mention/id/name.")] DiscordChannel? channel = null) | |
| { | |
| var targetChannel = channel ?? ctx.Member?.VoiceState?.Channel; | |
| if (targetChannel is null) | |
| { | |
| await ctx.RespondAsync("You are not in a voice channel. Use `!join <voice-channel>` to pick one explicitly."); | |
| return; | |
| } | |
| if (targetChannel.Type is not (ChannelType.Voice or ChannelType.Stage)) | |
| { | |
| await ctx.RespondAsync($"`{targetChannel.Name}` is not a voice or stage channel."); | |
| return; | |
| } | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = await voice.ConnectAsync(targetChannel); | |
| // ── Wire per-connection events ────────── | |
| conn.UserSpeaking += OnUserSpeaking; | |
| conn.UserJoined += OnUserJoined; | |
| conn.UserLeft += OnUserLeft; | |
| conn.VoiceSocketErrored += OnSocketError; | |
| conn.VoiceReceived += OnVoiceReceived; | |
| ctx.Client.Logger.LogInformation( | |
| "[Voice] Connected to {Channel} in {Guild}", targetChannel.Name, targetChannel.Guild.Name); | |
| await ctx.RespondAsync($"✅ Joined **{targetChannel.Name}**. DAVE is negotiated automatically — check logs for encryption status."); | |
| } | |
| // ── !leave ───────────────────────────────── | |
| [Command("leave"), Description("Disconnect from voice.")] | |
| public async Task LeaveAsync(CommandContext ctx) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected."); | |
| return; | |
| } | |
| StopFfmpeg(); | |
| StopRecorder(); | |
| conn.Disconnect(); | |
| await ctx.RespondAsync("👋 Disconnected."); | |
| } | |
| // ── !play <path> ─────────────────────────── | |
| [Command("play"), Description("Play an audio file (requires ffmpeg in PATH).")] | |
| public async Task PlayAsync(CommandContext ctx, [RemainingText, Description("Path or URL to audio file.")] string path) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected — use `!join` first."); | |
| return; | |
| } | |
| if (!TryResolveAudioPath(path, out var resolvedPath)) | |
| { | |
| await ctx.RespondAsync( | |
| "File not found.\n" + | |
| $"• input: `{path}`\n" + | |
| $"• cwd: `{Environment.CurrentDirectory}`\n" + | |
| $"• base: `{AppContext.BaseDirectory}`"); | |
| return; | |
| } | |
| StopFfmpeg(); | |
| _playCts = new CancellationTokenSource(); | |
| await ctx.RespondAsync($"▶️ Playing `{Path.GetFileName(resolvedPath)}`…"); | |
| // Fire-and-forget so the command returns immediately. | |
| _ = Task.Run(() => StreamFileAsync(conn, resolvedPath, _playCts.Token), _playCts.Token); | |
| } | |
| private static bool TryResolveAudioPath(string path, out string resolvedPath) | |
| { | |
| resolvedPath = path; | |
| if (string.IsNullOrWhiteSpace(path)) | |
| return false; | |
| // Normalize accidental shell-style quotes from command input. | |
| var trimmed = path.Trim().Trim('"'); | |
| if (Path.IsPathRooted(trimmed)) | |
| { | |
| if (File.Exists(trimmed)) | |
| { | |
| resolvedPath = trimmed; | |
| return true; | |
| } | |
| return false; | |
| } | |
| var candidates = new[] | |
| { | |
| trimmed, | |
| Path.Combine(Environment.CurrentDirectory, trimmed), | |
| Path.Combine(AppContext.BaseDirectory, trimmed) | |
| }; | |
| foreach (var candidate in candidates) | |
| { | |
| if (!File.Exists(candidate)) | |
| continue; | |
| resolvedPath = candidate; | |
| return true; | |
| } | |
| return false; | |
| } | |
| private static async Task StreamFileAsync(VoiceConnection conn, string path, CancellationToken ct) | |
| { | |
| var transmit = conn.GetTransmitSink(); | |
| var log = Log.Logger; | |
| _ffmpeg = Process.Start(new ProcessStartInfo | |
| { | |
| FileName = "ffmpeg", | |
| Arguments = $"-hide_banner -loglevel error -i \"{path}\" -ac 2 -f s16le -ar 48000 pipe:1", | |
| RedirectStandardOutput = true, | |
| RedirectStandardError = true, | |
| UseShellExecute = false, | |
| })!; | |
| // Drain ffmpeg stderr asynchronously so it never blocks the audio pipe. | |
| var localFfmpeg = _ffmpeg; | |
| _ = Task.Run(async () => | |
| { | |
| string? line; | |
| while ((line = await localFfmpeg.StandardError.ReadLineAsync().ConfigureAwait(false)) is not null) | |
| log.Error("[VoiceTest] ffmpeg stderr: {Line}", line); | |
| }, CancellationToken.None); | |
| try | |
| { | |
| await conn.SendSpeakingAsync(SpeakingFlags.Microphone); | |
| var pcm = _ffmpeg.StandardOutput.BaseStream; | |
| var buf = new byte[transmit.SampleLength]; | |
| int read; | |
| var totalBytes = 0; | |
| while (!ct.IsCancellationRequested && | |
| (read = await pcm.ReadAsync(buf.AsMemory(0, buf.Length), ct)) > 0) | |
| { | |
| totalBytes += read; | |
| log.Debug("[VoiceTest] ffmpeg produced {Bytes} bytes of PCM (total: {Total})", read, totalBytes); | |
| await transmit.WriteAsync(buf.AsMemory(0, read), ct); | |
| } | |
| log.Information("[VoiceTest] PCM stream ended — {Total} bytes fed to transmit sink", totalBytes); | |
| await transmit.FlushAsync(ct); | |
| await conn.WaitForPlaybackFinishAsync(); | |
| } | |
| catch (OperationCanceledException) { /* stopped intentionally */ } | |
| catch (Exception ex) | |
| { | |
| log.Error(ex, "[VoiceTest] Exception in StreamFileAsync for path={Path}", path); | |
| } | |
| finally | |
| { | |
| await conn.SendSpeakingAsync(SpeakingFlags.NotSpeaking); | |
| StopFfmpeg(); | |
| } | |
| } | |
| // ── !stop ────────────────────────────────── | |
| [Command("stop"), Description("Stop playback.")] | |
| public async Task StopAsync(CommandContext ctx) | |
| { | |
| StopFfmpeg(); | |
| await ctx.RespondAsync("⏹️ Stopped."); | |
| } | |
| // ── !pause ───────────────────────────────── | |
| [Command("pause"), Description("Pause playback.")] | |
| public async Task PauseAsync(CommandContext ctx) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| conn?.Pause(); | |
| await ctx.RespondAsync("⏸️ Paused."); | |
| } | |
| // ── !resume ──────────────────────────────── | |
| [Command("resume"), Description("Resume playback.")] | |
| public async Task ResumeAsync(CommandContext ctx) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is not null) | |
| await conn.ResumeAsync(); | |
| await ctx.RespondAsync("▶️ Resumed."); | |
| } | |
| // ── !listen ──────────────────────────────── | |
| [Command("listen"), Description("Toggle receive logging (prints packet info to console).")] | |
| public async Task ListenAsync(CommandContext ctx) | |
| { | |
| _listenActive = !_listenActive; | |
| await ctx.RespondAsync(_listenActive | |
| ? "🎧 Receive logging **ON** — incoming packets will be printed to console." | |
| : "🔇 Receive logging **OFF**."); | |
| } | |
| // ── !recstart [wav|mp3] ─────────────────── | |
| [Command("recstart"), Description("Start recording incoming audio to logs/recordings (wav/mp3).")] | |
| public async Task RecStartAsync(CommandContext ctx, [Description("Output format: wav or mp3 (default wav).")] string format = "wav") | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected — use `!join` first."); | |
| return; | |
| } | |
| if (!TryParseRecordingFormat(format, out var recordingFormat)) | |
| { | |
| await ctx.RespondAsync("Invalid format. Use `wav` or `mp3`."); | |
| return; | |
| } | |
| ReceiveRecorder recorder; | |
| var alreadyRunningName = string.Empty; | |
| lock (_recorderSync) | |
| { | |
| if (_recorder is not null) | |
| { | |
| alreadyRunningName = Path.GetFileName(_recorder.TargetPath); | |
| recorder = _recorder; | |
| } | |
| else | |
| { | |
| var recordingsDirectory = Path.Combine(Environment.CurrentDirectory, "logs", "recordings"); | |
| Directory.CreateDirectory(recordingsDirectory); | |
| var timestamp = DateTimeOffset.UtcNow.ToString("yyyyMMdd_HHmmss"); | |
| var basePath = Path.Combine(recordingsDirectory, $"voice_{ctx.Guild.Id}_{timestamp}"); | |
| recorder = new ReceiveRecorder(basePath, recordingFormat); | |
| _recorder = recorder; | |
| } | |
| } | |
| if (!string.IsNullOrEmpty(alreadyRunningName)) | |
| { | |
| await ctx.RespondAsync($"Recording already running: `{alreadyRunningName}`"); | |
| return; | |
| } | |
| _listenActive = true; | |
| await ctx.RespondAsync( | |
| $"⏺️ Recording started.\n" + | |
| $"• format: `{recordingFormat.ToString().ToLowerInvariant()}`\n" + | |
| $"• output: `{recorder.TargetPath}`\n" + | |
| "Recorder mixes all received participant audio into one timeline."); | |
| } | |
| // ── !recstop ─────────────────────────────── | |
| [Command("recstop"), Description("Stop recording and finalize output file.")] | |
| public async Task RecStopAsync(CommandContext ctx) | |
| { | |
| var summary = StopRecorder(); | |
| if (summary is null) | |
| { | |
| await ctx.RespondAsync("No active recording."); | |
| return; | |
| } | |
| await ctx.RespondAsync( | |
| "⏹️ Recording stopped.\n" + | |
| $"• file: `{summary.OutputPath}`\n" + | |
| $"• participants: `{summary.ParticipantCount}`\n" + | |
| $"• frames: `{summary.FrameCount}`\n" + | |
| $"• inserted silence frames: `{summary.InsertedSilenceFrames}`\n" + | |
| $"• captured bytes: `{summary.PcmBytes}`\n" + | |
| $"• participant stems: `{summary.StemDirectory ?? "n/a"}`"); | |
| } | |
| // ── !recstatus ───────────────────────────── | |
| [Command("recstatus"), Description("Show recording status.")] | |
| public async Task RecStatusAsync(CommandContext ctx) | |
| { | |
| ReceiveRecorder? recorder; | |
| lock (_recorderSync) | |
| recorder = _recorder; | |
| if (recorder is null) | |
| { | |
| await ctx.RespondAsync("No active recording."); | |
| return; | |
| } | |
| var snapshot = recorder.GetSnapshot(); | |
| await ctx.RespondAsync( | |
| "🎙️ Recording active.\n" + | |
| $"• target: `{snapshot.TargetPath}`\n" + | |
| $"• participants: `{snapshot.ParticipantCount}`\n" + | |
| $"• frames: `{snapshot.FrameCount}`\n" + | |
| $"• inserted silence frames: `{snapshot.InsertedSilenceFrames}`\n" + | |
| $"• captured bytes: `{snapshot.PcmBytes}`"); | |
| } | |
| // ── !dave ────────────────────────────────── | |
| [Command("dave"), Description("Print DAVE encryption status.")] | |
| public async Task DaveAsync(CommandContext ctx) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected."); | |
| return; | |
| } | |
| // VoiceConnection doesn't expose a public IsDaveActive property yet, | |
| // so we report configuration intent. | |
| await ctx.RespondAsync( | |
| "**DAVE status:**\n" + | |
| "• DAVE is enabled when `MaxDaveProtocolVersion = 1` (current config).\n" + | |
| "• Whether the server negotiated DAVE for this session is visible in the Debug logs " + | |
| "(look for `DAVE_VERSION` gateway opcode).\n" + | |
| "• If libdave was not found at startup, DAVE is automatically disabled and a " + | |
| "`LogError` was emitted."); | |
| } | |
| // ── !ping ────────────────────────────────── | |
| [Command("ping"), Description("Print voice gateway and UDP latency.")] | |
| public async Task PingAsync(CommandContext ctx) | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected."); | |
| return; | |
| } | |
| await ctx.RespondAsync( | |
| $"🏓 **WebSocket ping:** {conn.WebSocketPing} ms\n" + | |
| $"📡 **UDP ping:** {conn.UdpPing} ms"); | |
| } | |
| // ── !vdebug [on|off|status] ──────────────── | |
| [Command("vdebug"), Description("Toggle or show voice debug logging for this active voice connection.")] | |
| public async Task VoiceDebugAsync( | |
| CommandContext ctx, | |
| [Description("on/off/status (default status).")] string mode = "status") | |
| { | |
| var voice = ctx.Client.GetVoice(); | |
| var conn = voice.GetConnection(ctx.Guild); | |
| if (conn is null) | |
| { | |
| await ctx.RespondAsync("Not connected."); | |
| return; | |
| } | |
| if (string.Equals(mode, "status", StringComparison.OrdinalIgnoreCase)) | |
| { | |
| await ctx.RespondAsync($"Voice debug logging is currently **{(conn.EnableDebugLogging ? "ON" : "OFF")}** for this connection."); | |
| return; | |
| } | |
| if (string.Equals(mode, "on", StringComparison.OrdinalIgnoreCase)) | |
| { | |
| conn.EnableDebugLogging = true; | |
| await ctx.RespondAsync("✅ Voice debug logging enabled for this connection."); | |
| return; | |
| } | |
| if (string.Equals(mode, "off", StringComparison.OrdinalIgnoreCase)) | |
| { | |
| conn.EnableDebugLogging = false; | |
| await ctx.RespondAsync("✅ Voice debug logging disabled for this connection."); | |
| return; | |
| } | |
| await ctx.RespondAsync("Usage: `!vdebug on`, `!vdebug off`, or `!vdebug status`."); | |
| } | |
| // ───────────────────────────────────────── | |
| // Event handlers | |
| // ───────────────────────────────────────── | |
| private static Task OnUserSpeaking(VoiceConnection conn, UserSpeakingEventArgs e) | |
| { | |
| Log.Logger.Debug( | |
| "[Voice] {User} is {State} (SSRC {Ssrc})", | |
| e.User?.Username ?? "unknown", | |
| e.Speaking is SpeakingFlags.NotSpeaking ? "silent" : "speaking", | |
| e.Ssrc); | |
| return Task.CompletedTask; | |
| } | |
| private static Task OnUserJoined(VoiceConnection conn, VoiceUserJoinEventArgs e) | |
| { | |
| Log.Logger.Information( | |
| "[Voice] {User} joined (SSRC {Ssrc})", e.User.Username, e.Ssrc); | |
| return Task.CompletedTask; | |
| } | |
| private static Task OnUserLeft(VoiceConnection conn, VoiceUserLeaveEventArgs e) | |
| { | |
| Log.Logger.Information( | |
| "[Voice] {User} left (SSRC {Ssrc})", e.User.Username, e.Ssrc); | |
| return Task.CompletedTask; | |
| } | |
| private static Task OnSocketError(VoiceConnection conn, SocketErrorEventArgs e) | |
| { | |
| Log.Logger.Error(e.Exception, "[Voice] WebSocket error"); | |
| return Task.CompletedTask; | |
| } | |
| private static Task OnVoiceReceived(VoiceConnection conn, VoiceReceiveEventArgs e) | |
| { | |
| ReceiveRecorder? recorder; | |
| lock (_recorderSync) | |
| recorder = _recorder; | |
| recorder?.TryWrite(e); | |
| if (!_listenActive) | |
| return Task.CompletedTask; | |
| var pcm = e.PcmData.Span; | |
| var sampleCount = pcm.Length / 2; | |
| long sumSquares = 0; | |
| var peak = 0; | |
| var nonTrivialSamples = 0; | |
| for (var i = 0; i + 1 < pcm.Length; i += 2) | |
| { | |
| var sample = BinaryPrimitives.ReadInt16LittleEndian(pcm.Slice(i, 2)); | |
| var abs = Math.Abs((int)sample); | |
| if (abs > peak) | |
| peak = abs; | |
| // Ignore near-zero quantization noise for "activity" checks. | |
| if (abs > 64) | |
| nonTrivialSamples++; | |
| sumSquares += (long)sample * sample; | |
| } | |
| var rms = sampleCount > 0 | |
| ? Math.Sqrt(sumSquares / (double)sampleCount) / short.MaxValue | |
| : 0d; | |
| var activityRatio = sampleCount > 0 | |
| ? nonTrivialSamples / (double)sampleCount | |
| : 0d; | |
| // A "usable" decoded frame should carry more than just tiny dithering/noise. | |
| var usable = sampleCount > 0 && (rms >= 0.0025 || activityRatio >= 0.01); | |
| var diag = _receiveDiagnostics.GetOrAdd(e.Ssrc, static _ => new()); | |
| var now = DateTimeOffset.UtcNow; | |
| var missingFrames = 0; | |
| lock (diag.Sync) | |
| { | |
| if (diag.LastPacketAt is { } lastPacketAt) | |
| { | |
| var gapMs = (now - lastPacketAt).TotalMilliseconds - e.AudioDuration; | |
| if (gapMs >= e.AudioDuration * 1.5) | |
| missingFrames = (int)Math.Floor(gapMs / Math.Max(1, e.AudioDuration)); | |
| } | |
| diag.LastPacketAt = now; | |
| diag.TotalFrames++; | |
| if (usable) | |
| diag.UsableFrames++; | |
| if (missingFrames > 0) | |
| diag.MissingFrames += missingFrames; | |
| } | |
| Log.Logger.Debug( | |
| "[Receive] SSRC={Ssrc} User={User} usable={Usable} rms={Rms:0.0000} active={Active:0.0%} peak={Peak} dur={Duration}ms PCM={PcmBytes}B Opus={OpusBytes}B missingFrames={MissingFrames}", | |
| e.Ssrc, | |
| e.User?.Username ?? "unknown", | |
| usable, | |
| rms, | |
| activityRatio, | |
| peak, | |
| e.AudioDuration, | |
| e.PcmData.Length, | |
| e.OpusData.Length, | |
| missingFrames); | |
| lock (diag.Sync) | |
| { | |
| if (diag.TotalFrames % 50 == 0) | |
| { | |
| var usableRatio = diag.TotalFrames > 0 | |
| ? diag.UsableFrames / (double)diag.TotalFrames | |
| : 0d; | |
| Log.Logger.Information( | |
| "[ReceiveSummary] SSRC={Ssrc} frames={Frames} usable={Usable:0.0%} missingFrames={MissingFrames}", | |
| e.Ssrc, diag.TotalFrames, usableRatio, diag.MissingFrames); | |
| } | |
| } | |
| return Task.CompletedTask; | |
| } | |
| // ───────────────────────────────────────── | |
| // Helpers | |
| // ───────────────────────────────────────── | |
| private static void StopFfmpeg() | |
| { | |
| _playCts?.Cancel(); | |
| _playCts = null; | |
| if (_ffmpeg is not null && !_ffmpeg.HasExited) | |
| { | |
| try { _ffmpeg.Kill(); } catch { /* already gone */ } | |
| } | |
| _ffmpeg = null; | |
| } | |
| private static RecordingSummary? StopRecorder() | |
| { | |
| ReceiveRecorder? recorder; | |
| lock (_recorderSync) | |
| { | |
| recorder = _recorder; | |
| _recorder = null; | |
| } | |
| if (recorder is null) | |
| return null; | |
| return recorder.Stop(); | |
| } | |
| private static bool TryParseRecordingFormat(string value, out RecordingFormat format) | |
| { | |
| if (string.Equals(value, "wav", StringComparison.OrdinalIgnoreCase)) | |
| { | |
| format = RecordingFormat.Wav; | |
| return true; | |
| } | |
| if (string.Equals(value, "mp3", StringComparison.OrdinalIgnoreCase)) | |
| { | |
| format = RecordingFormat.Mp3; | |
| return true; | |
| } | |
| format = default; | |
| return false; | |
| } | |
| private sealed class ReceiveDiagnostics | |
| { | |
| public Lock Sync { get; } = new(); | |
| public DateTimeOffset? LastPacketAt { get; set; } | |
| public int TotalFrames { get; set; } | |
| public int UsableFrames { get; set; } | |
| public int MissingFrames { get; set; } | |
| } | |
| private enum RecordingFormat | |
| { | |
| Wav, | |
| Mp3 | |
| } | |
| private sealed class RecordingSnapshot | |
| { | |
| public required string TargetPath { get; init; } | |
| public required int ParticipantCount { get; init; } | |
| public required int FrameCount { get; init; } | |
| public required int InsertedSilenceFrames { get; init; } | |
| public required long PcmBytes { get; init; } | |
| } | |
| private sealed class RecordingSummary | |
| { | |
| public required string OutputPath { get; init; } | |
| public required string? StemDirectory { get; init; } | |
| public required int ParticipantCount { get; init; } | |
| public required int FrameCount { get; init; } | |
| public required int InsertedSilenceFrames { get; init; } | |
| public required long PcmBytes { get; init; } | |
| } | |
| private sealed class ReceiveRecorder | |
| { | |
| private readonly Lock _sync = new(); | |
| private readonly string _wavPath; | |
| private readonly string _stemsDirectory; | |
| private readonly RecordingFormat _format; | |
| private readonly FileStream _wavStream; | |
| private readonly Dictionary<uint, ParticipantTrack> _tracks = []; | |
| private readonly HashSet<uint> _participantSsrcs = []; | |
| private readonly DateTimeOffset _startedAt = DateTimeOffset.UtcNow; | |
| private bool _stopped; | |
| private AudioFormat _audioFormat; | |
| private bool _audioFormatSet; | |
| private DateTimeOffset? _lastPacketAt; | |
| private int _frameDurationMs = 20; | |
| private int _frameByteLength; | |
| private byte[] _silenceFrameBuffer = []; | |
| private int _frameCount; | |
| private int _insertedSilenceFrames; | |
| private long _pcmBytes; | |
| private sealed class ParticipantTrack | |
| { | |
| public MemoryStream Pcm { get; set; } = new(); | |
| public string Label { get; init; } = "unknown"; | |
| public int StartOffsetFrames { get; set; } | |
| public int FramesWritten { get; set; } | |
| } | |
| public ReceiveRecorder(string basePath, RecordingFormat format) | |
| { | |
| _format = format; | |
| _wavPath = basePath + ".wav"; | |
| _stemsDirectory = basePath + "_stems"; | |
| this.TargetPath = format == RecordingFormat.Mp3 ? basePath + ".mp3" : _wavPath; | |
| _wavStream = new FileStream(_wavPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read); | |
| WriteWavHeaderPlaceholder(_wavStream); | |
| } | |
| public string TargetPath { get; } | |
| public void TryWrite(VoiceReceiveEventArgs e) | |
| { | |
| lock (_sync) | |
| { | |
| if (_stopped || e.PcmData.Length <= 0) | |
| return; | |
| var now = DateTimeOffset.UtcNow; | |
| _participantSsrcs.Add(e.Ssrc); | |
| if (!_audioFormatSet) | |
| { | |
| _audioFormat = e.AudioFormat; | |
| _audioFormatSet = true; | |
| _frameDurationMs = Math.Max(1, e.AudioDuration); | |
| _frameByteLength = e.PcmData.Length; | |
| _silenceFrameBuffer = new byte[e.PcmData.Length]; | |
| Log.Logger.Information( | |
| "[Recorder] Multi-user track capture initialized from SSRC={Ssrc} User={User} Format={Rate}Hz/{Channels}ch frame={Duration}ms", | |
| e.Ssrc, | |
| e.User?.Username ?? "unknown", | |
| e.AudioFormat.SampleRate, | |
| e.AudioFormat.ChannelCount, | |
| _frameDurationMs); | |
| } | |
| if (_audioFormatSet && | |
| (_audioFormat.SampleRate != e.AudioFormat.SampleRate || _audioFormat.ChannelCount != e.AudioFormat.ChannelCount)) | |
| { | |
| Log.Logger.Warning( | |
| "[Recorder] Ignoring frame with format mismatch for SSRC={Ssrc} User={User}: got {Rate}Hz/{Channels}ch, expected {ExpectedRate}Hz/{ExpectedChannels}ch", | |
| e.Ssrc, | |
| e.User?.Username ?? "unknown", | |
| e.AudioFormat.SampleRate, | |
| e.AudioFormat.ChannelCount, | |
| _audioFormat.SampleRate, | |
| _audioFormat.ChannelCount); | |
| return; | |
| } | |
| if (e.PcmData.Length != _frameByteLength) | |
| { | |
| Log.Logger.Warning( | |
| "[Recorder] Ignoring frame with size mismatch for SSRC={Ssrc} User={User}: got {Bytes} bytes, expected {ExpectedBytes} bytes", | |
| e.Ssrc, | |
| e.User?.Username ?? "unknown", | |
| e.PcmData.Length, | |
| _frameByteLength); | |
| return; | |
| } | |
| if (!_tracks.TryGetValue(e.Ssrc, out var track)) | |
| { | |
| track = new ParticipantTrack | |
| { | |
| Label = SanitizeFileName(e.User?.Username ?? $"ssrc_{e.Ssrc}"), | |
| StartOffsetFrames = GetLeadingFrames(now) | |
| }; | |
| _tracks[e.Ssrc] = track; | |
| } | |
| track.Pcm.Write(e.PcmData.Span); | |
| track.FramesWritten++; | |
| _lastPacketAt = now; | |
| } | |
| } | |
| public RecordingSnapshot GetSnapshot() | |
| { | |
| lock (_sync) | |
| { | |
| return new RecordingSnapshot | |
| { | |
| TargetPath = this.TargetPath, | |
| ParticipantCount = _participantSsrcs.Count, | |
| FrameCount = GetCurrentFrameCount(), | |
| InsertedSilenceFrames = _insertedSilenceFrames, | |
| PcmBytes = _pcmBytes | |
| }; | |
| } | |
| } | |
| public RecordingSummary Stop() | |
| { | |
| lock (_sync) | |
| { | |
| if (_stopped) | |
| { | |
| return new RecordingSummary | |
| { | |
| OutputPath = this.TargetPath, | |
| StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null, | |
| ParticipantCount = _participantSsrcs.Count, | |
| FrameCount = _frameCount, | |
| InsertedSilenceFrames = _insertedSilenceFrames, | |
| PcmBytes = _pcmBytes | |
| }; | |
| } | |
| _stopped = true; | |
| if (_audioFormatSet) | |
| { | |
| var finalFrameCount = GetCurrentFrameCount(); | |
| if (_lastPacketAt is { } lastPacketAt) | |
| { | |
| var trailingMs = (DateTimeOffset.UtcNow - lastPacketAt).TotalMilliseconds; | |
| if (trailingMs >= _frameDurationMs * 1.5) | |
| finalFrameCount += (int)Math.Floor(trailingMs / _frameDurationMs); | |
| } | |
| ExportStemFiles(); | |
| AlignTracks(finalFrameCount); | |
| MixTracksToWav(finalFrameCount); | |
| } | |
| FinalizeWavFile(_wavStream, _pcmBytes); | |
| _wavStream.Dispose(); | |
| var outputPath = this.TargetPath; | |
| if (_format == RecordingFormat.Mp3) | |
| { | |
| if (!TranscodeToMp3(_wavPath, outputPath)) | |
| { | |
| outputPath = _wavPath; | |
| Log.Logger.Warning("[Recorder] MP3 transcoding failed. Keeping WAV output at {Path}", outputPath); | |
| } | |
| } | |
| var durationSec = (DateTimeOffset.UtcNow - _startedAt).TotalSeconds; | |
| Log.Logger.Information( | |
| "[Recorder] Finalized {Path} ({Participants} participants, {Frames} frames, {SilenceFrames} inserted silence frames, {Bytes} PCM bytes, {Duration:0.0}s)", | |
| outputPath, _participantSsrcs.Count, _frameCount, _insertedSilenceFrames, _pcmBytes, durationSec); | |
| return new RecordingSummary | |
| { | |
| OutputPath = outputPath, | |
| StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null, | |
| ParticipantCount = _participantSsrcs.Count, | |
| FrameCount = _frameCount, | |
| InsertedSilenceFrames = _insertedSilenceFrames, | |
| PcmBytes = _pcmBytes | |
| }; | |
| } | |
| } | |
| private int GetLeadingFrames(DateTimeOffset firstPacketAt) | |
| { | |
| var leadingMs = (firstPacketAt - _startedAt).TotalMilliseconds - _frameDurationMs; | |
| if (leadingMs < _frameDurationMs * 1.5) | |
| return 0; | |
| return (int)Math.Floor(leadingMs / _frameDurationMs); | |
| } | |
| private int GetCurrentFrameCount() | |
| { | |
| if (_tracks.Count == 0) | |
| return 0; | |
| return _tracks.Values.Max(track => track.StartOffsetFrames + track.FramesWritten); | |
| } | |
| private void AlignTracks(int frameCount) | |
| { | |
| foreach (var track in _tracks.Values) | |
| { | |
| if (track.StartOffsetFrames > 0) | |
| { | |
| var aligned = new MemoryStream(); | |
| WriteSilenceFrames(aligned, track.StartOffsetFrames); | |
| track.Pcm.Position = 0; | |
| track.Pcm.CopyTo(aligned); | |
| track.Pcm.Dispose(); | |
| track.Pcm = aligned; | |
| track.FramesWritten += track.StartOffsetFrames; | |
| _insertedSilenceFrames += track.StartOffsetFrames; | |
| track.StartOffsetFrames = 0; | |
| } | |
| var missing = frameCount - track.FramesWritten; | |
| if (missing <= 0) | |
| continue; | |
| WriteSilenceFrames(track.Pcm, missing); | |
| track.FramesWritten += missing; | |
| _insertedSilenceFrames += missing; | |
| } | |
| } | |
| private void MixTracksToWav(int frameCount) | |
| { | |
| if (_tracks.Count == 0 || _frameByteLength <= 0) | |
| return; | |
| var sampleCount = _frameByteLength / sizeof(short); | |
| var mixedSamples = new int[sampleCount]; | |
| var activeContributors = new int[sampleCount]; | |
| var mixedFrame = new byte[_frameByteLength]; | |
| var readers = new (MemoryStream Stream, byte[] Buffer)[_tracks.Count]; | |
| var idx = 0; | |
| foreach (var track in _tracks.Values) | |
| { | |
| track.Pcm.Position = 0; | |
| readers[idx++] = (track.Pcm, new byte[_frameByteLength]); | |
| } | |
| for (var frame = 0; frame < frameCount; frame++) | |
| { | |
| Array.Clear(mixedSamples, 0, mixedSamples.Length); | |
| Array.Clear(activeContributors, 0, activeContributors.Length); | |
| for (var r = 0; r < readers.Length; r++) | |
| { | |
| var reader = readers[r]; | |
| var read = reader.Stream.Read(reader.Buffer, 0, reader.Buffer.Length); | |
| if (read <= 0) | |
| continue; | |
| if (read < reader.Buffer.Length) | |
| Array.Clear(reader.Buffer, read, reader.Buffer.Length - read); | |
| var span = reader.Buffer.AsSpan(); | |
| for (var i = 0; i < sampleCount; i++) | |
| { | |
| var sample = BinaryPrimitives.ReadInt16LittleEndian(span.Slice(i * sizeof(short), sizeof(short))); | |
| mixedSamples[i] += sample; | |
| if (Math.Abs(sample) > 64) | |
| activeContributors[i]++; | |
| } | |
| } | |
| var output = mixedFrame.AsSpan(); | |
| for (var i = 0; i < sampleCount; i++) | |
| { | |
| var contributors = Math.Max(1, activeContributors[i]); | |
| var mixed = (mixedSamples[i] / (double)contributors) * 0.98; | |
| var clamped = Math.Clamp((int)Math.Round(mixed), short.MinValue, short.MaxValue); | |
| BinaryPrimitives.WriteInt16LittleEndian(output.Slice(i * sizeof(short), sizeof(short)), (short)clamped); | |
| } | |
| _wavStream.Write(mixedFrame); | |
| _pcmBytes += mixedFrame.Length; | |
| _frameCount++; | |
| } | |
| } | |
| private void ExportStemFiles() | |
| { | |
| Directory.CreateDirectory(_stemsDirectory); | |
| foreach (var (ssrc, track) in _tracks) | |
| { | |
| var stemPath = Path.Combine(_stemsDirectory, $"{ssrc}_{track.Label}.wav"); | |
| using var stemStream = new FileStream(stemPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read); | |
| WriteWavHeaderPlaceholder(stemStream); | |
| track.Pcm.Position = 0; | |
| track.Pcm.CopyTo(stemStream); | |
| FinalizeWavFile(stemStream, track.Pcm.Length); | |
| } | |
| } | |
| private void WriteSilenceFrames(Stream stream, int frameCount) | |
| { | |
| if (frameCount <= 0 || _silenceFrameBuffer.Length == 0) | |
| return; | |
| for (var i = 0; i < frameCount; i++) | |
| stream.Write(_silenceFrameBuffer); | |
| } | |
| private static bool TranscodeToMp3(string wavPath, string mp3Path) | |
| { | |
| try | |
| { | |
| using var ffmpeg = Process.Start(new ProcessStartInfo | |
| { | |
| FileName = "ffmpeg", | |
| Arguments = $"-y -hide_banner -loglevel error -i \"{wavPath}\" -codec:a libmp3lame -q:a 2 \"{mp3Path}\"", | |
| RedirectStandardError = true, | |
| UseShellExecute = false | |
| }); | |
| if (ffmpeg is null) | |
| return false; | |
| var stderr = ffmpeg.StandardError.ReadToEnd(); | |
| ffmpeg.WaitForExit(); | |
| if (ffmpeg.ExitCode != 0) | |
| { | |
| Log.Logger.Error("[Recorder] ffmpeg failed with exit code {ExitCode}: {Stderr}", ffmpeg.ExitCode, stderr); | |
| return false; | |
| } | |
| if (File.Exists(mp3Path)) | |
| File.Delete(wavPath); | |
| return true; | |
| } | |
| catch (Exception ex) | |
| { | |
| Log.Logger.Error(ex, "[Recorder] Exception while transcoding WAV to MP3"); | |
| return false; | |
| } | |
| } | |
| private static string SanitizeFileName(string value) | |
| { | |
| var chars = value.ToCharArray(); | |
| for (var i = 0; i < chars.Length; i++) | |
| { | |
| if (Array.IndexOf(Path.GetInvalidFileNameChars(), chars[i]) >= 0) | |
| chars[i] = '_'; | |
| } | |
| var sanitized = new string(chars).Trim(); | |
| return string.IsNullOrWhiteSpace(sanitized) ? "unknown" : sanitized; | |
| } | |
| private static void WriteWavHeaderPlaceholder(Stream stream) | |
| { | |
| Span<byte> header = stackalloc byte[44]; | |
| header.Clear(); | |
| "RIFF"u8.CopyTo(header.Slice(0, 4)); | |
| "WAVE"u8.CopyTo(header.Slice(8, 4)); | |
| "fmt "u8.CopyTo(header.Slice(12, 4)); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16); | |
| "data"u8.CopyTo(header.Slice(36, 4)); | |
| stream.Write(header); | |
| } | |
| private void FinalizeWavFile(Stream stream, long pcmBytes) | |
| { | |
| if (!_audioFormatSet) | |
| _audioFormat = AudioFormat.Default; | |
| Span<byte> header = stackalloc byte[44]; | |
| header.Clear(); | |
| "RIFF"u8.CopyTo(header.Slice(0, 4)); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(4, 4), (int)(36 + pcmBytes)); | |
| "WAVE"u8.CopyTo(header.Slice(8, 4)); | |
| "fmt "u8.CopyTo(header.Slice(12, 4)); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(22, 2), (short)_audioFormat.ChannelCount); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(24, 4), _audioFormat.SampleRate); | |
| var byteRate = _audioFormat.SampleRate * _audioFormat.ChannelCount * sizeof(short); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(28, 4), byteRate); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(32, 2), (short)(_audioFormat.ChannelCount * sizeof(short))); | |
| BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16); | |
| "data"u8.CopyTo(header.Slice(36, 4)); | |
| BinaryPrimitives.WriteInt32LittleEndian(header.Slice(40, 4), (int)pcmBytes); | |
| stream.Seek(0, SeekOrigin.Begin); | |
| stream.Write(header); | |
| stream.Flush(); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <Project Sdk="Microsoft.NET.Sdk"> | |
| <PropertyGroup> | |
| <OutputType>Exe</OutputType> | |
| <TargetFramework>net10.0</TargetFramework> | |
| <ImplicitUsings>enable</ImplicitUsings> | |
| <Nullable>enable</Nullable> | |
| <NuGetAuditSuppress>true</NuGetAuditSuppress> | |
| <NuGetAudit>false</NuGetAudit> | |
| </PropertyGroup> | |
| <ItemGroup> | |
| <PackageReference Include="DisCatSharp" Version="10.7.0-nightly-069" /> | |
| <PackageReference Include="DisCatSharp.CommandsNext" Version="10.7.0-nightly-069" /> | |
| <PackageReference Include="DisCatSharp.Voice" Version="10.7.0-nightly-069" /> | |
| <PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.3" /> | |
| <PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.3" /> | |
| <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.3" /> | |
| <PackageReference Include="Serilog" Version="4.3.1" /> | |
| <PackageReference Include="Serilog.Extensions.Logging" Version="10.0.0" /> | |
| <PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" /> | |
| <PackageReference Include="Serilog.Sinks.Debug" Version="3.0.0" /> | |
| <PackageReference Include="Serilog.Sinks.File" Version="7.0.0" /> | |
| <PackageReference Include="System.Memory" Version="4.6.3" /> | |
| </ItemGroup> | |
| </Project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment