Last active
December 9, 2023 00:16
-
-
Save shiguruikai/1b549ebbc7c902628f3cbaa3a305073f to your computer and use it in GitHub Desktop.
rvcやsvc用の学習データを作成するためのスクリプト。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Requires -Version 7.1 | |
| [CmdletBinding()] | |
| param ( | |
| [Parameter(Mandatory)] | |
| [string] | |
| $SrcDir, | |
| [Parameter(Mandatory)] | |
| [string] | |
| $DstDir, | |
| [Parameter()] | |
| [int] | |
| $SamplingRate = 44100, | |
| [Parameter()] | |
| [ValidateSet("left", "rigth", "mix")] | |
| [string] | |
| $MonoChannel = "left", | |
| [Parameter()] | |
| [double] | |
| $HPF = 80, | |
| [Parameter()] | |
| [boolean] | |
| $NormalizeDC = $true, | |
| # https://ffmpeg.org/ffmpeg-filters.html#anlmdn | |
| [Parameter()] | |
| [ValidateRange(0.00001, 10000)] | |
| [double] | |
| $NoiseReductionLevel = 0.01, | |
| [Parameter()] | |
| [int] | |
| $LoudnormI = -14, | |
| [Parameter()] | |
| [int] | |
| $LoudnormTP = -2, | |
| [Parameter()] | |
| [int] | |
| $LoudnormLRA = 11, | |
| [Parameter()] | |
| [ValidateRange(0, 97)] | |
| [int] | |
| $SilenceThreshold = -40, | |
| [Parameter()] | |
| [double] | |
| $MinSilenceDuration = 0.2, | |
| [Parameter()] | |
| [double] | |
| $MinSplitDuration = 2.0, | |
| [Parameter()] | |
| [double] | |
| $SkipTotalDuration = 0.4, | |
| [Parameter()] | |
| [double] | |
| $ConcatDuration = 2, | |
| [Parameter()] | |
| [ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")] | |
| [string] | |
| $OutputCodec = "pcm_s16le", | |
| [Parameter()] | |
| [ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")] | |
| [string] | |
| $TempCodec = "pcm_f32le", | |
| [Parameter()] | |
| [ValidateSet("quiet", "panic", "fatal", "error", "warning", "info", "verbose", "debug", "trace")] | |
| [string] | |
| $FFmpegLogLevel = "error" | |
| ) | |
| $ErrorActionPreference = 'Stop' | |
| $SrcDir = Resolve-Path $SrcDir | |
| $DstDir = (mkdir $DstDir -Force).FullName | |
| if (Get-ChildItem $DstDir) { | |
| Write-Host "[warn] File exists in destination directory." -ForegroundColor Yellow | |
| } | |
| # 論理コア数 | |
| $ncpus = (Get-CimInstance win32_processor).NumberOfLogicalProcessors | |
| # $ncpus = (Get-ComputerInfo).CsNumberOfLogicalProcessors | |
| # 並列実行数 | |
| $parallelLimit = [Math]::Max(1, [int]($ncpus * 0.8)) | |
| # Step 1. モノラル化 & ハイパスフィルター & ノイズリダクション | |
| $i = 0 | |
| $inputFiles = Get-ChildItem $SrcDir -File | |
| $inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
| $DstDir = $using:DstDir | |
| $SamplingRate = $using:SamplingRate | |
| $MonoChannel = $using:MonoChannel | |
| $HPF = $using:HPF | |
| $NormalizeDC = $using:NormalizeDC | |
| $NoiseReductionLevel = $using:NoiseReductionLevel | |
| $TempCodec = $using:TempCodec | |
| $FFmpegLogLevel = $using:FFmpegLogLevel | |
| $inputFile = $_ | |
| $outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).wav" | |
| $panFilter = if ($MonoChannel -eq 'left') { | |
| 'pan=mono|FC=FC+FL' | |
| } | |
| elseif ($MonoChannel -eq 'rigth') { | |
| 'pan=mono|FC=FC+FR' | |
| } | |
| else { | |
| 'pan=mono|FC=FC+FL*0.5+FR*0.5' | |
| } | |
| ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -i $inputFile.FullName ` | |
| -af "$($panFilter),highpass=f=$($HPF):n=$([int]$NormalizeDC),anlmdn=s=$($NoiseReductionLevel)" ` | |
| -ac 1 -ar $SamplingRate -acodec $TempCodec ` | |
| $outputFilePath ` | |
| -y 2>&1 | Write-Host | |
| return $_ | |
| } | ForEach-Object { | |
| $i++ | |
| Write-Progress "Step 1. Mono & HPF & Noise Reduction" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
| } | |
| # Step 2. ラウドネス正規化 | |
| $i = 0 | |
| $inputFiles = Get-ChildItem $DstDir -File | |
| $inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
| $DstDir = $using:DstDir | |
| $SamplingRate = $using:SamplingRate | |
| $I = $using:LoudnormI | |
| $TP = $using:LoudnormTP | |
| $LRA = $using:LoudnormLRA | |
| $FFmpegLogLevel = $using:FFmpegLogLevel | |
| $TempCodec = $using:TempCodec | |
| $inputFile = $_ | |
| $loudnormFilter = ffmpeg ` | |
| -i $inputFile.FullName ` | |
| -af "loudnorm=I=$($I):TP=$($TP):LRA=$($LRA):print_format=json" ` | |
| -f null - 2>&1 ` | |
| | Out-String ` | |
| | Select-String "\[Parsed_loudnorm.*?\]\s*(\{[\s\S]*?\})" ` | |
| | ForEach-Object { $_.Matches.Groups[1].Value } ` | |
| | ConvertFrom-Json ` | |
| | ForEach-Object { | |
| "loudnorm=I=$($I):TP=$($TP):LRA=$($LRA)" ` | |
| + ":measured_I=$($_.input_i):measured_TP=$($_.input_tp):measured_LRA=$($_.input_lra)" ` | |
| + ":measured_thresh=$($_.input_thresh):offset=$($_.target_offset)" | |
| } | |
| $outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav" | |
| ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -i $inputFile.FullName ` | |
| -af $loudnormFilter ` | |
| -ac 1 -ar $SamplingRate -acodec $TempCodec ` | |
| $outputFilePath ` | |
| -y 2>&1 | Write-Host | |
| $inputFile | Remove-Item | |
| Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav" | |
| return $_ | |
| } | ForEach-Object { | |
| $i++ | |
| Write-Progress "Step 2. Loudness Normalization" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
| } | |
| # Step 3. 無音で分割 | |
| $i = 0 | |
| $inputFiles = Get-ChildItem $DstDir -File | |
| $inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
| $DstDir = $using:DstDir | |
| $SilenceThreshold = $using:SilenceThreshold | |
| $MinSilenceDuration = $using:MinSilenceDuration | |
| $MinSplitDuration = $using:MinSplitDuration | |
| $FFmpegLogLevel = $using:FFmpegLogLevel | |
| $inputFile = $_ | |
| $audioInfo = ffprobe ` | |
| -v error ` | |
| -i $inputFile.FullName ` | |
| -print_format json -show_entries format=duration 2>&1 ` | |
| | ConvertFrom-Json | |
| $totalDuration = [double]$audioInfo.format.duration | |
| # 最小時間より短い場合は分割しない。 | |
| if ($totalDuration -lt $MinSplitDuration) { | |
| return $_ | |
| } | |
| # 無音が検出された時間 | |
| [double[]]$silenceTimes = ffmpeg ` | |
| -i $inputFile.FullName ` | |
| -af "silencedetect=n=$($SilenceThreshold)dB:d=$($MinSilenceDuration)" ` | |
| -f null - 2>&1 ` | |
| | Select-String "silence_end" ` | |
| | ForEach-Object { | |
| $array = $_ -split ' ' | |
| $silence_end = [double]$array[4] | |
| $silence_duration = [double]$array[7] | |
| # 無音部分の真ん中の時間を返す。 | |
| return $silence_end - ($silence_duration / 2) | |
| } | |
| [double[]]$segmentTimes = @() | |
| # 分割後の長さが最小時間以上であれば、$segmentTimesに追加する。 | |
| foreach ($st in $silenceTimes) { | |
| if (($st - [double]$segmentTimes[-1]) -ge $MinSplitDuration ` | |
| -and ($totalDuration - $st) -ge $MinSplitDuration) { | |
| $segmentTimes += $st | |
| } | |
| } | |
| if (-Not $segmentTimes.Count) { | |
| return $_ | |
| } | |
| $outputFilePath = Join-Path $DstDir "$($inputFile.BaseName)_%04d$($inputFile.Extension)" | |
| ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -i $inputFile.FullName ` | |
| -f segment -segment_times ($segmentTimes -join ',') ` | |
| -reset_timestamps 1 ` | |
| -map 0:a ` | |
| -c:a copy $outputFilePath ` | |
| -y 2>&1 | Write-Host | |
| Remove-Item $inputFile.FullName | |
| return $_ | |
| } | ForEach-Object { | |
| $i++ | |
| Write-Progress "Step 3. Split By Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
| } | |
| # Step 4. 無音を削除 | |
| $i = 0 | |
| $inputFiles = Get-ChildItem $DstDir -File | |
| $inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
| $DstDir = $using:DstDir | |
| $SamplingRate = $using:SamplingRate | |
| $SilenceThreshold = $using:SilenceThreshold | |
| $MinSilenceDuration = $using:MinSilenceDuration | |
| $FFmpegLogLevel = $using:FFmpegLogLevel | |
| $OutputCodec = $using:OutputCodec | |
| $inputFile = $_ | |
| # 最初の無音を削除 | |
| $removeStart = "silenceremove=start_periods=1:start_silence=0.02:start_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01" | |
| $filters = @( | |
| # デジタル無音を全て削除 | |
| "silenceremove=window=0:detection=peak:stop_mode=all:start_mode=all:stop_periods=-1:stop_threshold=0", | |
| # 先頭の無音を削除 | |
| $removeStart, | |
| # 末尾の無音を削除(逆転 → 無音除去 → 再逆転) | |
| "areverse", $removeStart, "areverse", | |
| # 一定の音量かつ一定の長さの無音を全て削除 | |
| "silenceremove=stop_periods=-1:stop_duration=$($MinSilenceDuration):stop_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01" | |
| ) | |
| $outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav" | |
| ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -i $inputFile.FullName ` | |
| -af ($filters -join ',') ` | |
| -ac 1 -ar $SamplingRate -acodec $OutputCodec ` | |
| $outputFilePath ` | |
| -y 2>&1 | Write-Host | |
| $inputFile | Remove-Item | |
| Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav" | |
| return $_ | |
| } | ForEach-Object { | |
| $i++ | |
| Write-Progress "Step 4. Remove Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
| } | |
| # Step 5. オーディオの長さを確認 | |
| $audioInfoList = [System.Collections.Generic.List[psobject]]::new() | |
| $i = 0 | |
| $inputFiles = Get-ChildItem $DstDir -File | |
| $inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
| $DstDir = $using:DstDir | |
| $inputFile = $_ | |
| $audioInfo = ffprobe ` | |
| -v error ` | |
| -i $inputFile.FullName ` | |
| -print_format json -show_entries format=duration 2>&1 ` | |
| | ConvertFrom-Json | |
| return [PSCustomObject]@{ | |
| FullName = $_.FullName | |
| BaseName = $_.BaseName | |
| Duration = [double]$audioInfo.format.duration | |
| } | |
| } | ForEach-Object { | |
| $audioInfoList += $_ | |
| $i++ | |
| Write-Progress "Step 5. Check audio duration" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
| } | |
| $audioInfoList = @($audioInfoList | Sort-Object FullName) | |
| $removeTargets = $audioInfoList | Where-Object { $_.Duration -le $SkipTotalDuration } | |
| $concatTargets = $audioInfoList | Where-Object { $_.Duration -gt $SkipTotalDuration -and $_.Duration -le $ConcatDuration } | |
| # Step 6. かなり短いファイルを削除 | |
| $i = 0 | |
| $removeTargets | ForEach-Object { | |
| $i++ | |
| Remove-Item $_.FullName | |
| Write-Progress "Step 6. Remove very short files" "$i / $($removeTargets.Count)" -PercentComplete ($i / $removeTargets.Count * 100) | |
| } | |
| # Step 7. 短いファイルを連結 | |
| try { | |
| # 無音ファイルの作成 | |
| $silentFilePath = Join-Path $DstDir "silent.temp.wav" | |
| ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -f lavfi ` | |
| -i "anullsrc=cl=mono:r=$($SamplingRate)" ` | |
| -t 0.02 ` | |
| $silentFilePath ` | |
| -y 2>&1 | Write-Host | |
| $filePaths = [System.Collections.Generic.List[string]]::new() | |
| $i = 0 | |
| while ($true) { | |
| $filePaths.Clear() | |
| $totalDuration = 0.0 | |
| for (; $totalDuration -lt $ConcatDuration -and $i -lt $concatTargets.Count; $i++) { | |
| $t = $concatTargets[$i] | |
| $filePaths.Add($t.FullName) | |
| $totalDuration += $t.Duration | |
| } | |
| if (-not $filePaths.Count) { | |
| break | |
| } | |
| $outputFilePath = Join-Path $DstDir "$(Split-Path $filePaths[0] -LeafBase)_$($i).concat.wav" | |
| $fileList = ($filePaths | ForEach-Object { "file '$($_)'" }) -join "`nfile '$($silentFilePath)'`n" | |
| $fileList | ffmpeg ` | |
| -v $FFmpegLogLevel ` | |
| -protocol_whitelist 'pipe,file' ` | |
| -safe 0 ` | |
| -f concat ` | |
| -i pipe:0 ` | |
| -c:a copy $outputFilePath ` | |
| -y 2>&1 | Write-Host | |
| $filePaths | Remove-Item | |
| Write-Progress "Step 7. Concat short files" "$i / $($concatTargets.Count)" -PercentComplete ($i / $concatTargets.Count * 100) | |
| } | |
| } | |
| finally { | |
| # 作成した無音ファイルを削除 | |
| Remove-Item $silentFilePath -ErrorAction SilentlyContinue | |
| } | |
| # Step 8. リネーム | |
| $i = 0 | |
| $dstFiles = Get-ChildItem $DstDir -File | |
| $dstFiles | Sort-Object Name | ForEach-Object { | |
| $i++ | |
| $_ | Rename-Item -NewName "$($_.BaseName).temp$($_.Extension)" | |
| Write-Progress "Step 8a. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100) | |
| } | |
| $i = 0 | |
| $dstFiles = Get-ChildItem $DstDir -File | |
| $padWidth = $dstFiles.Count.ToString().Length | |
| $dstFiles | Sort-Object Name | ForEach-Object { | |
| $i++ | |
| $_ | Rename-Item -NewName "$($i.ToString().PadLeft($padWidth, '0'))$($_.Extension)" | |
| Write-Progress "Step 8b. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment