Skip to content

Instantly share code, notes, and snippets.

@Lulalaby
Last active March 8, 2026 23:13
Show Gist options
  • Select an option

  • Save Lulalaby/16ffc1263a7e60e438a11978e062f3a9 to your computer and use it in GitHub Desktop.

Select an option

Save Lulalaby/16ffc1263a7e60e438a11978e062f3a9 to your computer and use it in GitHub Desktop.
VoiceTestBot
// VoiceTestBot.cs
// Drop this file into a console app that references DisCatSharp, DisCatSharp.Voice,
// DisCatSharp.Voice.Natives, and DisCatSharp.CommandsNext.
//
// Required NuGet packages:
// DisCatSharp
// DisCatSharp.Voice
// DisCatSharp.Voice.Natives
// DisCatSharp.CommandsNext
//
// Set environment variable DISCORD_TOKEN before running.
// Optional: set DISCORD_PREFIX (default: "!")
//
// Available commands:
// !join [channel] — join caller's voice channel or an explicit voice/stage channel
// !leave — disconnect from voice
// !play <path> — stream an audio file through ffmpeg (requires ffmpeg in PATH)
// !stop — stop playback
// !pause — pause playback
// !resume — resume playback
// !listen — toggle voice receive (dumps PCM packet info to console)
// !recstart [fmt] — start recording incoming audio (fmt: wav/mp3)
// !recstop — stop recording and finalize output file
// !recstatus — print recording status
// !dave — print current DAVE encryption status
// !vdebug [mode] — voice debug logs for current connection (mode: on/off/status)
// !ping — print WebSocket + UDP latency
using System.Diagnostics;
using System.Collections.Concurrent;
using System.Buffers.Binary;
using System.Threading;
using DisCatSharp;
using DisCatSharp.CommandsNext;
using DisCatSharp.CommandsNext.Attributes;
using DisCatSharp.Entities;
using DisCatSharp.Enums;
using DisCatSharp.EventArgs;
using DisCatSharp.Voice;
using DisCatSharp.Voice.EventArgs;
using Microsoft.Extensions.Logging;
using Serilog;
// ─────────────────────────────────────────────
// Entry point
// ─────────────────────────────────────────────
var logDirectory = Path.Combine(Environment.CurrentDirectory, "logs");
Directory.CreateDirectory(logDirectory);
var logPath = Path.Combine(logDirectory, "voice_test_bot.log");
Log.Logger = new LoggerConfiguration()
.MinimumLevel.Verbose()
.WriteTo.Console(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
.WriteTo.Debug(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
.WriteTo.File(logPath, rollingInterval: RollingInterval.Hour, outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext} - {Message:lj}{NewLine}{Exception}")
.CreateLogger();
var token = Environment.GetEnvironmentVariable("DISCORD_TOKEN") ?? throw new InvalidOperationException("DISCORD_TOKEN not set");
var prefix = Environment.GetEnvironmentVariable("DISCORD_PREFIX") ?? "!";
var client = new DiscordClient(new DiscordConfiguration
{
Token = token,
TokenType = TokenType.Bot,
Intents = DiscordIntents.AllUnprivileged | DiscordIntents.MessageContent,
MinimumLogLevel = LogLevel.Trace,
LoggerFactory = new Serilog.Extensions.Logging.SerilogLoggerFactory(Log.Logger, dispose: false),
});
// ── Voice ──────────────────────────────────────
client.UseVoice(new VoiceConfiguration
{
EnableIncoming = true, // receive incoming audio
MaxDaveProtocolVersion = 1, // enable DAVE E2EE (set to 0 to test without DAVE)
EnableDebugLogging = true, // toggles all debug/trace logs inside DisCatSharp.Voice
DavePendingAudioBehavior = DavePendingAudioBehavior.PassThrough // set Drop to block send until DAVE is Active
});
// ── Commands ───────────────────────────────────
var commands = client.UseCommandsNext(new CommandsNextConfiguration
{
StringPrefixes = [prefix],
EnableDms = false,
});
commands.RegisterCommands<VoiceCommands>();
// ── Global voice diagnostics ───────────────────
client.Ready += (c, e) =>
{
c.Logger.LogInformation("[Bot] Ready as {User}", c.CurrentUser.Username);
return Task.CompletedTask;
};
await client.ConnectAsync();
await Task.Delay(Timeout.Infinite);
// ─────────────────────────────────────────────
// Voice command module
// ─────────────────────────────────────────────
public sealed class VoiceCommands : BaseCommandModule
{
// Track whether receive logging is active per guild.
private static bool _listenActive;
private static readonly ConcurrentDictionary<uint, ReceiveDiagnostics> _receiveDiagnostics = new();
private static readonly Lock _recorderSync = new();
private static ReceiveRecorder? _recorder;
// Track the current ffmpeg process per guild (single-guild test bot).
private static Process? _ffmpeg;
private static CancellationTokenSource? _playCts;
// ── !join ──────────────────────────────────
[Command("join"), Description("Join your voice channel, or provide a target voice/stage channel.")]
public async Task JoinAsync(CommandContext ctx, [Description("Optional target channel mention/id/name.")] DiscordChannel? channel = null)
{
var targetChannel = channel ?? ctx.Member?.VoiceState?.Channel;
if (targetChannel is null)
{
await ctx.RespondAsync("You are not in a voice channel. Use `!join <voice-channel>` to pick one explicitly.");
return;
}
if (targetChannel.Type is not (ChannelType.Voice or ChannelType.Stage))
{
await ctx.RespondAsync($"`{targetChannel.Name}` is not a voice or stage channel.");
return;
}
var voice = ctx.Client.GetVoice();
var conn = await voice.ConnectAsync(targetChannel);
// ── Wire per-connection events ──────────
conn.UserSpeaking += OnUserSpeaking;
conn.UserJoined += OnUserJoined;
conn.UserLeft += OnUserLeft;
conn.VoiceSocketErrored += OnSocketError;
conn.VoiceReceived += OnVoiceReceived;
ctx.Client.Logger.LogInformation(
"[Voice] Connected to {Channel} in {Guild}", targetChannel.Name, targetChannel.Guild.Name);
await ctx.RespondAsync($"✅ Joined **{targetChannel.Name}**. DAVE is negotiated automatically — check logs for encryption status.");
}
// ── !leave ─────────────────────────────────
[Command("leave"), Description("Disconnect from voice.")]
public async Task LeaveAsync(CommandContext ctx)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected.");
return;
}
StopFfmpeg();
StopRecorder();
conn.Disconnect();
await ctx.RespondAsync("👋 Disconnected.");
}
// ── !play <path> ───────────────────────────
[Command("play"), Description("Play an audio file (requires ffmpeg in PATH).")]
public async Task PlayAsync(CommandContext ctx, [RemainingText, Description("Path or URL to audio file.")] string path)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected — use `!join` first.");
return;
}
if (!TryResolveAudioPath(path, out var resolvedPath))
{
await ctx.RespondAsync(
"File not found.\n" +
$"• input: `{path}`\n" +
$"• cwd: `{Environment.CurrentDirectory}`\n" +
$"• base: `{AppContext.BaseDirectory}`");
return;
}
StopFfmpeg();
_playCts = new CancellationTokenSource();
await ctx.RespondAsync($"▶️ Playing `{Path.GetFileName(resolvedPath)}`…");
// Fire-and-forget so the command returns immediately.
_ = Task.Run(() => StreamFileAsync(conn, resolvedPath, _playCts.Token), _playCts.Token);
}
private static bool TryResolveAudioPath(string path, out string resolvedPath)
{
resolvedPath = path;
if (string.IsNullOrWhiteSpace(path))
return false;
// Normalize accidental shell-style quotes from command input.
var trimmed = path.Trim().Trim('"');
if (Path.IsPathRooted(trimmed))
{
if (File.Exists(trimmed))
{
resolvedPath = trimmed;
return true;
}
return false;
}
var candidates = new[]
{
trimmed,
Path.Combine(Environment.CurrentDirectory, trimmed),
Path.Combine(AppContext.BaseDirectory, trimmed)
};
foreach (var candidate in candidates)
{
if (!File.Exists(candidate))
continue;
resolvedPath = candidate;
return true;
}
return false;
}
private static async Task StreamFileAsync(VoiceConnection conn, string path, CancellationToken ct)
{
var transmit = conn.GetTransmitSink();
var log = Log.Logger;
_ffmpeg = Process.Start(new ProcessStartInfo
{
FileName = "ffmpeg",
Arguments = $"-hide_banner -loglevel error -i \"{path}\" -ac 2 -f s16le -ar 48000 pipe:1",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
})!;
// Drain ffmpeg stderr asynchronously so it never blocks the audio pipe.
var localFfmpeg = _ffmpeg;
_ = Task.Run(async () =>
{
string? line;
while ((line = await localFfmpeg.StandardError.ReadLineAsync().ConfigureAwait(false)) is not null)
log.Error("[VoiceTest] ffmpeg stderr: {Line}", line);
}, CancellationToken.None);
try
{
await conn.SendSpeakingAsync(SpeakingFlags.Microphone);
var pcm = _ffmpeg.StandardOutput.BaseStream;
var buf = new byte[transmit.SampleLength];
int read;
var totalBytes = 0;
while (!ct.IsCancellationRequested &&
(read = await pcm.ReadAsync(buf.AsMemory(0, buf.Length), ct)) > 0)
{
totalBytes += read;
log.Debug("[VoiceTest] ffmpeg produced {Bytes} bytes of PCM (total: {Total})", read, totalBytes);
await transmit.WriteAsync(buf.AsMemory(0, read), ct);
}
log.Information("[VoiceTest] PCM stream ended — {Total} bytes fed to transmit sink", totalBytes);
await transmit.FlushAsync(ct);
await conn.WaitForPlaybackFinishAsync();
}
catch (OperationCanceledException) { /* stopped intentionally */ }
catch (Exception ex)
{
log.Error(ex, "[VoiceTest] Exception in StreamFileAsync for path={Path}", path);
}
finally
{
await conn.SendSpeakingAsync(SpeakingFlags.NotSpeaking);
StopFfmpeg();
}
}
// ── !stop ──────────────────────────────────
[Command("stop"), Description("Stop playback.")]
public async Task StopAsync(CommandContext ctx)
{
StopFfmpeg();
await ctx.RespondAsync("⏹️ Stopped.");
}
// ── !pause ─────────────────────────────────
[Command("pause"), Description("Pause playback.")]
public async Task PauseAsync(CommandContext ctx)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
conn?.Pause();
await ctx.RespondAsync("⏸️ Paused.");
}
// ── !resume ────────────────────────────────
[Command("resume"), Description("Resume playback.")]
public async Task ResumeAsync(CommandContext ctx)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is not null)
await conn.ResumeAsync();
await ctx.RespondAsync("▶️ Resumed.");
}
// ── !listen ────────────────────────────────
[Command("listen"), Description("Toggle receive logging (prints packet info to console).")]
public async Task ListenAsync(CommandContext ctx)
{
_listenActive = !_listenActive;
await ctx.RespondAsync(_listenActive
? "🎧 Receive logging **ON** — incoming packets will be printed to console."
: "🔇 Receive logging **OFF**.");
}
// ── !recstart [wav|mp3] ───────────────────
[Command("recstart"), Description("Start recording incoming audio to logs/recordings (wav/mp3).")]
public async Task RecStartAsync(CommandContext ctx, [Description("Output format: wav or mp3 (default wav).")] string format = "wav")
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected — use `!join` first.");
return;
}
if (!TryParseRecordingFormat(format, out var recordingFormat))
{
await ctx.RespondAsync("Invalid format. Use `wav` or `mp3`.");
return;
}
ReceiveRecorder recorder;
var alreadyRunningName = string.Empty;
lock (_recorderSync)
{
if (_recorder is not null)
{
alreadyRunningName = Path.GetFileName(_recorder.TargetPath);
recorder = _recorder;
}
else
{
var recordingsDirectory = Path.Combine(Environment.CurrentDirectory, "logs", "recordings");
Directory.CreateDirectory(recordingsDirectory);
var timestamp = DateTimeOffset.UtcNow.ToString("yyyyMMdd_HHmmss");
var basePath = Path.Combine(recordingsDirectory, $"voice_{ctx.Guild.Id}_{timestamp}");
recorder = new ReceiveRecorder(basePath, recordingFormat);
_recorder = recorder;
}
}
if (!string.IsNullOrEmpty(alreadyRunningName))
{
await ctx.RespondAsync($"Recording already running: `{alreadyRunningName}`");
return;
}
_listenActive = true;
await ctx.RespondAsync(
$"⏺️ Recording started.\n" +
$"• format: `{recordingFormat.ToString().ToLowerInvariant()}`\n" +
$"• output: `{recorder.TargetPath}`\n" +
"Recorder mixes all received participant audio into one timeline.");
}
// ── !recstop ───────────────────────────────
[Command("recstop"), Description("Stop recording and finalize output file.")]
public async Task RecStopAsync(CommandContext ctx)
{
var summary = StopRecorder();
if (summary is null)
{
await ctx.RespondAsync("No active recording.");
return;
}
await ctx.RespondAsync(
"⏹️ Recording stopped.\n" +
$"• file: `{summary.OutputPath}`\n" +
$"• participants: `{summary.ParticipantCount}`\n" +
$"• frames: `{summary.FrameCount}`\n" +
$"• inserted silence frames: `{summary.InsertedSilenceFrames}`\n" +
$"• captured bytes: `{summary.PcmBytes}`\n" +
$"• participant stems: `{summary.StemDirectory ?? "n/a"}`");
}
// ── !recstatus ─────────────────────────────
[Command("recstatus"), Description("Show recording status.")]
public async Task RecStatusAsync(CommandContext ctx)
{
ReceiveRecorder? recorder;
lock (_recorderSync)
recorder = _recorder;
if (recorder is null)
{
await ctx.RespondAsync("No active recording.");
return;
}
var snapshot = recorder.GetSnapshot();
await ctx.RespondAsync(
"🎙️ Recording active.\n" +
$"• target: `{snapshot.TargetPath}`\n" +
$"• participants: `{snapshot.ParticipantCount}`\n" +
$"• frames: `{snapshot.FrameCount}`\n" +
$"• inserted silence frames: `{snapshot.InsertedSilenceFrames}`\n" +
$"• captured bytes: `{snapshot.PcmBytes}`");
}
// ── !dave ──────────────────────────────────
[Command("dave"), Description("Print DAVE encryption status.")]
public async Task DaveAsync(CommandContext ctx)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected.");
return;
}
// VoiceConnection doesn't expose a public IsDaveActive property yet,
// so we report configuration intent.
await ctx.RespondAsync(
"**DAVE status:**\n" +
"• DAVE is enabled when `MaxDaveProtocolVersion = 1` (current config).\n" +
"• Whether the server negotiated DAVE for this session is visible in the Debug logs " +
"(look for `DAVE_VERSION` gateway opcode).\n" +
"• If libdave was not found at startup, DAVE is automatically disabled and a " +
"`LogError` was emitted.");
}
// ── !ping ──────────────────────────────────
[Command("ping"), Description("Print voice gateway and UDP latency.")]
public async Task PingAsync(CommandContext ctx)
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected.");
return;
}
await ctx.RespondAsync(
$"🏓 **WebSocket ping:** {conn.WebSocketPing} ms\n" +
$"📡 **UDP ping:** {conn.UdpPing} ms");
}
// ── !vdebug [on|off|status] ────────────────
[Command("vdebug"), Description("Toggle or show voice debug logging for this active voice connection.")]
public async Task VoiceDebugAsync(
CommandContext ctx,
[Description("on/off/status (default status).")] string mode = "status")
{
var voice = ctx.Client.GetVoice();
var conn = voice.GetConnection(ctx.Guild);
if (conn is null)
{
await ctx.RespondAsync("Not connected.");
return;
}
if (string.Equals(mode, "status", StringComparison.OrdinalIgnoreCase))
{
await ctx.RespondAsync($"Voice debug logging is currently **{(conn.EnableDebugLogging ? "ON" : "OFF")}** for this connection.");
return;
}
if (string.Equals(mode, "on", StringComparison.OrdinalIgnoreCase))
{
conn.EnableDebugLogging = true;
await ctx.RespondAsync("✅ Voice debug logging enabled for this connection.");
return;
}
if (string.Equals(mode, "off", StringComparison.OrdinalIgnoreCase))
{
conn.EnableDebugLogging = false;
await ctx.RespondAsync("✅ Voice debug logging disabled for this connection.");
return;
}
await ctx.RespondAsync("Usage: `!vdebug on`, `!vdebug off`, or `!vdebug status`.");
}
// ─────────────────────────────────────────
// Event handlers
// ─────────────────────────────────────────
private static Task OnUserSpeaking(VoiceConnection conn, UserSpeakingEventArgs e)
{
Log.Logger.Debug(
"[Voice] {User} is {State} (SSRC {Ssrc})",
e.User?.Username ?? "unknown",
e.Speaking is SpeakingFlags.NotSpeaking ? "silent" : "speaking",
e.Ssrc);
return Task.CompletedTask;
}
private static Task OnUserJoined(VoiceConnection conn, VoiceUserJoinEventArgs e)
{
Log.Logger.Information(
"[Voice] {User} joined (SSRC {Ssrc})", e.User.Username, e.Ssrc);
return Task.CompletedTask;
}
private static Task OnUserLeft(VoiceConnection conn, VoiceUserLeaveEventArgs e)
{
Log.Logger.Information(
"[Voice] {User} left (SSRC {Ssrc})", e.User.Username, e.Ssrc);
return Task.CompletedTask;
}
private static Task OnSocketError(VoiceConnection conn, SocketErrorEventArgs e)
{
Log.Logger.Error(e.Exception, "[Voice] WebSocket error");
return Task.CompletedTask;
}
private static Task OnVoiceReceived(VoiceConnection conn, VoiceReceiveEventArgs e)
{
ReceiveRecorder? recorder;
lock (_recorderSync)
recorder = _recorder;
recorder?.TryWrite(e);
if (!_listenActive)
return Task.CompletedTask;
var pcm = e.PcmData.Span;
var sampleCount = pcm.Length / 2;
long sumSquares = 0;
var peak = 0;
var nonTrivialSamples = 0;
for (var i = 0; i + 1 < pcm.Length; i += 2)
{
var sample = BinaryPrimitives.ReadInt16LittleEndian(pcm.Slice(i, 2));
var abs = Math.Abs((int)sample);
if (abs > peak)
peak = abs;
// Ignore near-zero quantization noise for "activity" checks.
if (abs > 64)
nonTrivialSamples++;
sumSquares += (long)sample * sample;
}
var rms = sampleCount > 0
? Math.Sqrt(sumSquares / (double)sampleCount) / short.MaxValue
: 0d;
var activityRatio = sampleCount > 0
? nonTrivialSamples / (double)sampleCount
: 0d;
// A "usable" decoded frame should carry more than just tiny dithering/noise.
var usable = sampleCount > 0 && (rms >= 0.0025 || activityRatio >= 0.01);
var diag = _receiveDiagnostics.GetOrAdd(e.Ssrc, static _ => new());
var now = DateTimeOffset.UtcNow;
var missingFrames = 0;
lock (diag.Sync)
{
if (diag.LastPacketAt is { } lastPacketAt)
{
var gapMs = (now - lastPacketAt).TotalMilliseconds - e.AudioDuration;
if (gapMs >= e.AudioDuration * 1.5)
missingFrames = (int)Math.Floor(gapMs / Math.Max(1, e.AudioDuration));
}
diag.LastPacketAt = now;
diag.TotalFrames++;
if (usable)
diag.UsableFrames++;
if (missingFrames > 0)
diag.MissingFrames += missingFrames;
}
Log.Logger.Debug(
"[Receive] SSRC={Ssrc} User={User} usable={Usable} rms={Rms:0.0000} active={Active:0.0%} peak={Peak} dur={Duration}ms PCM={PcmBytes}B Opus={OpusBytes}B missingFrames={MissingFrames}",
e.Ssrc,
e.User?.Username ?? "unknown",
usable,
rms,
activityRatio,
peak,
e.AudioDuration,
e.PcmData.Length,
e.OpusData.Length,
missingFrames);
lock (diag.Sync)
{
if (diag.TotalFrames % 50 == 0)
{
var usableRatio = diag.TotalFrames > 0
? diag.UsableFrames / (double)diag.TotalFrames
: 0d;
Log.Logger.Information(
"[ReceiveSummary] SSRC={Ssrc} frames={Frames} usable={Usable:0.0%} missingFrames={MissingFrames}",
e.Ssrc, diag.TotalFrames, usableRatio, diag.MissingFrames);
}
}
return Task.CompletedTask;
}
// ─────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────
private static void StopFfmpeg()
{
_playCts?.Cancel();
_playCts = null;
if (_ffmpeg is not null && !_ffmpeg.HasExited)
{
try { _ffmpeg.Kill(); } catch { /* already gone */ }
}
_ffmpeg = null;
}
private static RecordingSummary? StopRecorder()
{
ReceiveRecorder? recorder;
lock (_recorderSync)
{
recorder = _recorder;
_recorder = null;
}
if (recorder is null)
return null;
return recorder.Stop();
}
private static bool TryParseRecordingFormat(string value, out RecordingFormat format)
{
if (string.Equals(value, "wav", StringComparison.OrdinalIgnoreCase))
{
format = RecordingFormat.Wav;
return true;
}
if (string.Equals(value, "mp3", StringComparison.OrdinalIgnoreCase))
{
format = RecordingFormat.Mp3;
return true;
}
format = default;
return false;
}
private sealed class ReceiveDiagnostics
{
public Lock Sync { get; } = new();
public DateTimeOffset? LastPacketAt { get; set; }
public int TotalFrames { get; set; }
public int UsableFrames { get; set; }
public int MissingFrames { get; set; }
}
private enum RecordingFormat
{
Wav,
Mp3
}
private sealed class RecordingSnapshot
{
public required string TargetPath { get; init; }
public required int ParticipantCount { get; init; }
public required int FrameCount { get; init; }
public required int InsertedSilenceFrames { get; init; }
public required long PcmBytes { get; init; }
}
private sealed class RecordingSummary
{
public required string OutputPath { get; init; }
public required string? StemDirectory { get; init; }
public required int ParticipantCount { get; init; }
public required int FrameCount { get; init; }
public required int InsertedSilenceFrames { get; init; }
public required long PcmBytes { get; init; }
}
private sealed class ReceiveRecorder
{
private readonly Lock _sync = new();
private readonly string _wavPath;
private readonly string _stemsDirectory;
private readonly RecordingFormat _format;
private readonly FileStream _wavStream;
private readonly Dictionary<uint, ParticipantTrack> _tracks = [];
private readonly HashSet<uint> _participantSsrcs = [];
private readonly DateTimeOffset _startedAt = DateTimeOffset.UtcNow;
private bool _stopped;
private AudioFormat _audioFormat;
private bool _audioFormatSet;
private DateTimeOffset? _lastPacketAt;
private int _frameDurationMs = 20;
private int _frameByteLength;
private byte[] _silenceFrameBuffer = [];
private int _frameCount;
private int _insertedSilenceFrames;
private long _pcmBytes;
private sealed class ParticipantTrack
{
public MemoryStream Pcm { get; set; } = new();
public string Label { get; init; } = "unknown";
public int StartOffsetFrames { get; set; }
public int FramesWritten { get; set; }
}
public ReceiveRecorder(string basePath, RecordingFormat format)
{
_format = format;
_wavPath = basePath + ".wav";
_stemsDirectory = basePath + "_stems";
this.TargetPath = format == RecordingFormat.Mp3 ? basePath + ".mp3" : _wavPath;
_wavStream = new FileStream(_wavPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read);
WriteWavHeaderPlaceholder(_wavStream);
}
public string TargetPath { get; }
public void TryWrite(VoiceReceiveEventArgs e)
{
lock (_sync)
{
if (_stopped || e.PcmData.Length <= 0)
return;
var now = DateTimeOffset.UtcNow;
_participantSsrcs.Add(e.Ssrc);
if (!_audioFormatSet)
{
_audioFormat = e.AudioFormat;
_audioFormatSet = true;
_frameDurationMs = Math.Max(1, e.AudioDuration);
_frameByteLength = e.PcmData.Length;
_silenceFrameBuffer = new byte[e.PcmData.Length];
Log.Logger.Information(
"[Recorder] Multi-user track capture initialized from SSRC={Ssrc} User={User} Format={Rate}Hz/{Channels}ch frame={Duration}ms",
e.Ssrc,
e.User?.Username ?? "unknown",
e.AudioFormat.SampleRate,
e.AudioFormat.ChannelCount,
_frameDurationMs);
}
if (_audioFormatSet &&
(_audioFormat.SampleRate != e.AudioFormat.SampleRate || _audioFormat.ChannelCount != e.AudioFormat.ChannelCount))
{
Log.Logger.Warning(
"[Recorder] Ignoring frame with format mismatch for SSRC={Ssrc} User={User}: got {Rate}Hz/{Channels}ch, expected {ExpectedRate}Hz/{ExpectedChannels}ch",
e.Ssrc,
e.User?.Username ?? "unknown",
e.AudioFormat.SampleRate,
e.AudioFormat.ChannelCount,
_audioFormat.SampleRate,
_audioFormat.ChannelCount);
return;
}
if (e.PcmData.Length != _frameByteLength)
{
Log.Logger.Warning(
"[Recorder] Ignoring frame with size mismatch for SSRC={Ssrc} User={User}: got {Bytes} bytes, expected {ExpectedBytes} bytes",
e.Ssrc,
e.User?.Username ?? "unknown",
e.PcmData.Length,
_frameByteLength);
return;
}
if (!_tracks.TryGetValue(e.Ssrc, out var track))
{
track = new ParticipantTrack
{
Label = SanitizeFileName(e.User?.Username ?? $"ssrc_{e.Ssrc}"),
StartOffsetFrames = GetLeadingFrames(now)
};
_tracks[e.Ssrc] = track;
}
track.Pcm.Write(e.PcmData.Span);
track.FramesWritten++;
_lastPacketAt = now;
}
}
public RecordingSnapshot GetSnapshot()
{
lock (_sync)
{
return new RecordingSnapshot
{
TargetPath = this.TargetPath,
ParticipantCount = _participantSsrcs.Count,
FrameCount = GetCurrentFrameCount(),
InsertedSilenceFrames = _insertedSilenceFrames,
PcmBytes = _pcmBytes
};
}
}
public RecordingSummary Stop()
{
lock (_sync)
{
if (_stopped)
{
return new RecordingSummary
{
OutputPath = this.TargetPath,
StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null,
ParticipantCount = _participantSsrcs.Count,
FrameCount = _frameCount,
InsertedSilenceFrames = _insertedSilenceFrames,
PcmBytes = _pcmBytes
};
}
_stopped = true;
if (_audioFormatSet)
{
var finalFrameCount = GetCurrentFrameCount();
if (_lastPacketAt is { } lastPacketAt)
{
var trailingMs = (DateTimeOffset.UtcNow - lastPacketAt).TotalMilliseconds;
if (trailingMs >= _frameDurationMs * 1.5)
finalFrameCount += (int)Math.Floor(trailingMs / _frameDurationMs);
}
ExportStemFiles();
AlignTracks(finalFrameCount);
MixTracksToWav(finalFrameCount);
}
FinalizeWavFile(_wavStream, _pcmBytes);
_wavStream.Dispose();
var outputPath = this.TargetPath;
if (_format == RecordingFormat.Mp3)
{
if (!TranscodeToMp3(_wavPath, outputPath))
{
outputPath = _wavPath;
Log.Logger.Warning("[Recorder] MP3 transcoding failed. Keeping WAV output at {Path}", outputPath);
}
}
var durationSec = (DateTimeOffset.UtcNow - _startedAt).TotalSeconds;
Log.Logger.Information(
"[Recorder] Finalized {Path} ({Participants} participants, {Frames} frames, {SilenceFrames} inserted silence frames, {Bytes} PCM bytes, {Duration:0.0}s)",
outputPath, _participantSsrcs.Count, _frameCount, _insertedSilenceFrames, _pcmBytes, durationSec);
return new RecordingSummary
{
OutputPath = outputPath,
StemDirectory = _tracks.Count > 0 ? _stemsDirectory : null,
ParticipantCount = _participantSsrcs.Count,
FrameCount = _frameCount,
InsertedSilenceFrames = _insertedSilenceFrames,
PcmBytes = _pcmBytes
};
}
}
private int GetLeadingFrames(DateTimeOffset firstPacketAt)
{
var leadingMs = (firstPacketAt - _startedAt).TotalMilliseconds - _frameDurationMs;
if (leadingMs < _frameDurationMs * 1.5)
return 0;
return (int)Math.Floor(leadingMs / _frameDurationMs);
}
private int GetCurrentFrameCount()
{
if (_tracks.Count == 0)
return 0;
return _tracks.Values.Max(track => track.StartOffsetFrames + track.FramesWritten);
}
private void AlignTracks(int frameCount)
{
foreach (var track in _tracks.Values)
{
if (track.StartOffsetFrames > 0)
{
var aligned = new MemoryStream();
WriteSilenceFrames(aligned, track.StartOffsetFrames);
track.Pcm.Position = 0;
track.Pcm.CopyTo(aligned);
track.Pcm.Dispose();
track.Pcm = aligned;
track.FramesWritten += track.StartOffsetFrames;
_insertedSilenceFrames += track.StartOffsetFrames;
track.StartOffsetFrames = 0;
}
var missing = frameCount - track.FramesWritten;
if (missing <= 0)
continue;
WriteSilenceFrames(track.Pcm, missing);
track.FramesWritten += missing;
_insertedSilenceFrames += missing;
}
}
private void MixTracksToWav(int frameCount)
{
if (_tracks.Count == 0 || _frameByteLength <= 0)
return;
var sampleCount = _frameByteLength / sizeof(short);
var mixedSamples = new int[sampleCount];
var activeContributors = new int[sampleCount];
var mixedFrame = new byte[_frameByteLength];
var readers = new (MemoryStream Stream, byte[] Buffer)[_tracks.Count];
var idx = 0;
foreach (var track in _tracks.Values)
{
track.Pcm.Position = 0;
readers[idx++] = (track.Pcm, new byte[_frameByteLength]);
}
for (var frame = 0; frame < frameCount; frame++)
{
Array.Clear(mixedSamples, 0, mixedSamples.Length);
Array.Clear(activeContributors, 0, activeContributors.Length);
for (var r = 0; r < readers.Length; r++)
{
var reader = readers[r];
var read = reader.Stream.Read(reader.Buffer, 0, reader.Buffer.Length);
if (read <= 0)
continue;
if (read < reader.Buffer.Length)
Array.Clear(reader.Buffer, read, reader.Buffer.Length - read);
var span = reader.Buffer.AsSpan();
for (var i = 0; i < sampleCount; i++)
{
var sample = BinaryPrimitives.ReadInt16LittleEndian(span.Slice(i * sizeof(short), sizeof(short)));
mixedSamples[i] += sample;
if (Math.Abs(sample) > 64)
activeContributors[i]++;
}
}
var output = mixedFrame.AsSpan();
for (var i = 0; i < sampleCount; i++)
{
var contributors = Math.Max(1, activeContributors[i]);
var mixed = (mixedSamples[i] / (double)contributors) * 0.98;
var clamped = Math.Clamp((int)Math.Round(mixed), short.MinValue, short.MaxValue);
BinaryPrimitives.WriteInt16LittleEndian(output.Slice(i * sizeof(short), sizeof(short)), (short)clamped);
}
_wavStream.Write(mixedFrame);
_pcmBytes += mixedFrame.Length;
_frameCount++;
}
}
private void ExportStemFiles()
{
Directory.CreateDirectory(_stemsDirectory);
foreach (var (ssrc, track) in _tracks)
{
var stemPath = Path.Combine(_stemsDirectory, $"{ssrc}_{track.Label}.wav");
using var stemStream = new FileStream(stemPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read);
WriteWavHeaderPlaceholder(stemStream);
track.Pcm.Position = 0;
track.Pcm.CopyTo(stemStream);
FinalizeWavFile(stemStream, track.Pcm.Length);
}
}
private void WriteSilenceFrames(Stream stream, int frameCount)
{
if (frameCount <= 0 || _silenceFrameBuffer.Length == 0)
return;
for (var i = 0; i < frameCount; i++)
stream.Write(_silenceFrameBuffer);
}
private static bool TranscodeToMp3(string wavPath, string mp3Path)
{
try
{
using var ffmpeg = Process.Start(new ProcessStartInfo
{
FileName = "ffmpeg",
Arguments = $"-y -hide_banner -loglevel error -i \"{wavPath}\" -codec:a libmp3lame -q:a 2 \"{mp3Path}\"",
RedirectStandardError = true,
UseShellExecute = false
});
if (ffmpeg is null)
return false;
var stderr = ffmpeg.StandardError.ReadToEnd();
ffmpeg.WaitForExit();
if (ffmpeg.ExitCode != 0)
{
Log.Logger.Error("[Recorder] ffmpeg failed with exit code {ExitCode}: {Stderr}", ffmpeg.ExitCode, stderr);
return false;
}
if (File.Exists(mp3Path))
File.Delete(wavPath);
return true;
}
catch (Exception ex)
{
Log.Logger.Error(ex, "[Recorder] Exception while transcoding WAV to MP3");
return false;
}
}
private static string SanitizeFileName(string value)
{
var chars = value.ToCharArray();
for (var i = 0; i < chars.Length; i++)
{
if (Array.IndexOf(Path.GetInvalidFileNameChars(), chars[i]) >= 0)
chars[i] = '_';
}
var sanitized = new string(chars).Trim();
return string.IsNullOrWhiteSpace(sanitized) ? "unknown" : sanitized;
}
private static void WriteWavHeaderPlaceholder(Stream stream)
{
Span<byte> header = stackalloc byte[44];
header.Clear();
"RIFF"u8.CopyTo(header.Slice(0, 4));
"WAVE"u8.CopyTo(header.Slice(8, 4));
"fmt "u8.CopyTo(header.Slice(12, 4));
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16);
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1);
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16);
"data"u8.CopyTo(header.Slice(36, 4));
stream.Write(header);
}
private void FinalizeWavFile(Stream stream, long pcmBytes)
{
if (!_audioFormatSet)
_audioFormat = AudioFormat.Default;
Span<byte> header = stackalloc byte[44];
header.Clear();
"RIFF"u8.CopyTo(header.Slice(0, 4));
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(4, 4), (int)(36 + pcmBytes));
"WAVE"u8.CopyTo(header.Slice(8, 4));
"fmt "u8.CopyTo(header.Slice(12, 4));
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(16, 4), 16);
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(20, 2), 1);
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(22, 2), (short)_audioFormat.ChannelCount);
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(24, 4), _audioFormat.SampleRate);
var byteRate = _audioFormat.SampleRate * _audioFormat.ChannelCount * sizeof(short);
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(28, 4), byteRate);
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(32, 2), (short)(_audioFormat.ChannelCount * sizeof(short)));
BinaryPrimitives.WriteInt16LittleEndian(header.Slice(34, 2), 16);
"data"u8.CopyTo(header.Slice(36, 4));
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(40, 4), (int)pcmBytes);
stream.Seek(0, SeekOrigin.Begin);
stream.Write(header);
stream.Flush();
}
}
}
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<NuGetAuditSuppress>true</NuGetAuditSuppress>
<NuGetAudit>false</NuGetAudit>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="DisCatSharp" Version="10.7.0-nightly-069" />
<PackageReference Include="DisCatSharp.CommandsNext" Version="10.7.0-nightly-069" />
<PackageReference Include="DisCatSharp.Voice" Version="10.7.0-nightly-069" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.3" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.3" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.3" />
<PackageReference Include="Serilog" Version="4.3.1" />
<PackageReference Include="Serilog.Extensions.Logging" Version="10.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
<PackageReference Include="Serilog.Sinks.Debug" Version="3.0.0" />
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
<PackageReference Include="System.Memory" Version="4.6.3" />
</ItemGroup>
</Project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment