Created
November 6, 2025 11:39
-
-
Save jTakasuRyuji/74ebaab366cd207d331154b0892c6b3b to your computer and use it in GitHub Desktop.
RLリストをソート、重複除去、FQDNでグループ化するスクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # URLリストをソート、重複除去、FQDNでグループ化するスクリプト | |
| $inputFile = ".\URLLIST.txt" | |
| $outputFile = ".\URLLIST_DISTINCT.txt" | |
| # 入力ファイルの存在チェック | |
| if (-not (Test-Path $inputFile)) { | |
| Write-Error "入力ファイル $inputFile が見つかりません。" | |
| exit 1 | |
| } | |
| # URLを読み込んでハッシュテーブルに格納(FQDNでグループ化) | |
| $urlGroups = @{} | |
| $urls = Get-Content $inputFile | Where-Object { $_ -match "^https?://" } | |
| foreach ($url in $urls) { | |
| try { | |
| $uri = [System.Uri]$url | |
| $domain = $uri.Host.ToLower() | |
| # トップレベルドメインを取得 | |
| $fqdn = $domain -replace '^www\.', '' | |
| if (-not $urlGroups.ContainsKey($fqdn)) { | |
| $urlGroups[$fqdn] = [System.Collections.Generic.HashSet[string]]::new() | |
| } | |
| [void]$urlGroups[$fqdn].Add($url) | |
| } | |
| catch { | |
| Write-Warning "無効なURL: $url" | |
| continue | |
| } | |
| } | |
| # 結果を出力 | |
| $output = New-Object System.Collections.ArrayList | |
| # ドメインでソート | |
| $sortedDomains = $urlGroups.Keys | Sort-Object | |
| foreach ($domain in $sortedDomains) { | |
| [void]$output.Add("# $domain") | |
| # ドメイン内のURLをソート | |
| $sortedUrls = $urlGroups[$domain] | Sort-Object | |
| foreach ($url in $sortedUrls) { | |
| [void]$output.Add($url) | |
| } | |
| [void]$output.Add("") | |
| } | |
| # ファイルに保存 | |
| $output | Set-Content -Path $outputFile -Encoding UTF8 | |
| Write-Host "処理が完了しました。" | |
| Write-Host "出力ファイル: $outputFile" | |
| Write-Host "グループ化されたドメイン数: $($sortedDomains.Count)" | |
| Write-Host "合計URL数: $($urls.Count)" | |
| Write-Host "重複除去後のURL数: $($urlGroups.Values | ForEach-Object { $_.Count } | Measure-Object -Sum).Sum" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment