Last active
January 18, 2026 04:50
-
-
Save baaamn/55854a54e2d32555f6e27445f089d086 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <# | |
| .SYNOPSIS | |
| Scans files or clipboard for URLs with customizable output fields and export capabilities. | |
| .DESCRIPTION | |
| This function parses text for HTTP/HTTPS links. It allows granular control over | |
| which metadata properties (Path, Line, Value, etc.) are included in the final output. | |
| It supports: | |
| 1. Detailed Extraction: Returns specific properties for every match. | |
| 2. Summary Mode: Returns a high-level count of unique URLs. | |
| 3. Exporting: Native support for saving to CSV or JSON. | |
| .PARAMETER Extension | |
| An array of file extensions to search recursively. Defaults to "*.txt". | |
| Example: -Extension "*.txt", "*.log", "*.md" | |
| .PARAMETER FromClipboard | |
| Switch. Processes the system clipboard content instead of files. | |
| .PARAMETER SelectProperties | |
| Specifies which columns to include in the output. | |
| Options: 'Path', 'Filename', 'LineNumber', 'Line', 'Matches', 'Value'. | |
| Default: '*' (All properties). | |
| Note: This parameter is ignored if -Summarize is used. | |
| .PARAMETER Summarize | |
| Switch. Changes output from a detailed list to a frequency count (URL + Count). | |
| .PARAMETER ExportPath | |
| Optional path to save results. If omitted, results output to the console. | |
| .PARAMETER ExportFormat | |
| 'CSV' or 'JSON'. Default is CSV. | |
| .EXAMPLE 1 | |
| Get-UrlMatches -SelectProperties Value, LineNumber | |
| Returns a clean list containing ONLY the URL and the line number it was found on. | |
| .EXAMPLE 2 | |
| Get-UrlMatches -Extension "*.log" -ExportPath "report.csv" -SelectProperties Path, Value | |
| Scans logs and creates a CSV containing only the file path and the URL found. | |
| .EXAMPLE 3 | |
| Get-UrlMatches -SelectProperties Path, Value, Line -ExportPath "C:\Audit\links.csv" | |
| The "Full Audit" to CSV | |
| This exports specific fields to a spreadsheet, ignoring the raw Regex object (which is messy in CSVs) but keeping the context. | |
| .EXAMPLE 4 | |
| Get-UrlMatches -SelectProperties Value | |
| The "Clean List" (Just the URLs) | |
| If you don't care about file paths or line numbers and just want a clean list of the links found: | |
| Only returns the 'Value' column | |
| .EXAMPLE 5 | |
| Get-UrlMatches -SelectProperties Filename, LineNumber | |
| The "Context Audit" (Location + Content) | |
| Returns the Filename and the exact Line Number | |
| If you are debugging and need to find *where* a bad link is hidden: | |
| #> | |
| function Get-UrlMatches { | |
| [CmdletBinding(DefaultParameterSetName = 'FromFiles')] | |
| param( | |
| # -- SOURCE INPUT -- | |
| [Parameter(ParameterSetName = 'FromFiles', Position = 0)] | |
| [string[]]$Extension = "*.txt", | |
| [Parameter(ParameterSetName = 'FromClipboard')] | |
| [switch]$FromClipboard, | |
| # -- OUTPUT CONTROL -- | |
| # The user can choose one, many, or all (*) of these specific properties. | |
| [Parameter()] | |
| [ValidateSet('Path', 'Filename', 'LineNumber', 'Line', 'Matches', 'Value', '*')] | |
| [string[]]$SelectProperties = '*', | |
| [Parameter()] | |
| [switch]$Summarize, | |
| # -- EXPORT CONFIG -- | |
| [Parameter()] | |
| [string]$ExportPath, | |
| [Parameter()] | |
| [ValidateSet('CSV', 'JSON')] | |
| [string]$ExportFormat = 'CSV' | |
| ) | |
| begin { | |
| Write-Verbose "Initializing URL Extractor..." | |
| # Using a Generic List for high-performance memory management during loops | |
| $ResultsList = [System.Collections.Generic.List[PSCustomObject]]::new() | |
| } | |
| process { | |
| # 1. DETERMINE DATA SOURCE | |
| if ($FromClipboard) { | |
| Write-Verbose "Reading Source: Clipboard" | |
| $InputData = Get-Clipboard | |
| } | |
| else { | |
| Write-Verbose "Reading Source: Files ($($Extension -join ', '))" | |
| # -ErrorAction SilentlyContinue prevents the script from stopping on locked system files | |
| $InputData = Get-ChildItem -Path . -Include $Extension -Recurse -File -ErrorAction SilentlyContinue | |
| } | |
| # 2. DEFINE REGEX | |
| # This pattern ensures the URL must END with an alphanumeric character or a slash. | |
| # It effectively strips trailing ')' '.' or ',' while keeping them inside the URL if valid. | |
| $Pattern = 'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@:%_\+~#//=])?' | |
| # 3. RUN EXTRACTION LOOP | |
| if ($InputData) { | |
| # Assign to $null to prevent default console output during processing | |
| $null = $InputData | | |
| Select-String -Pattern $Pattern -AllMatches | | |
| ForEach-Object { | |
| $fileInfo = $_ | |
| # Iterate over every regex match found on the current line | |
| $fileInfo.Matches | ForEach-Object { | |
| $match = $_ | |
| # Build the complete data object first | |
| # We gather all data here so it is available for selection later | |
| $obj = [PSCustomObject]@{ | |
| Path = if ($FromClipboard) { "Clipboard" } else { $fileInfo.Path } | |
| Filename = if ($FromClipboard) { "Clipboard" } else { $fileInfo.Filename } | |
| LineNumber = $fileInfo.LineNumber | |
| Line = $fileInfo.Line.Trim() | |
| Value = $match.Value | |
| Match = $match # The raw RegEx match object | |
| } | |
| $ResultsList.Add($obj) | |
| } | |
| } | |
| } | |
| else { | |
| Write-Warning "No content found to process." | |
| } | |
| } | |
| end { | |
| Write-Verbose "Processing complete. Total raw matches: $($ResultsList.Count)" | |
| # 4. FILTER / SHAPE THE DATA | |
| if ($Summarize) { | |
| # Mode A: Summary (ignores SelectProperties because structure is fixed) | |
| Write-Verbose "Mode: Summary (Counts)" | |
| $FinalOutput = $ResultsList | | |
| Group-Object -Property Value -NoElement | | |
| Sort-Object Count -Descending | | |
| Select-Object @{N='URL';E={$_.Name}}, Count | |
| } | |
| else { | |
| # Mode B: Detailed (Filters columns based on user request) | |
| Write-Verbose "Mode: Detailed (Properties: $($SelectProperties -join ', '))" | |
| if ($SelectProperties -contains '*') { | |
| # If wildcard is used, pass the full object | |
| $FinalOutput = $ResultsList | |
| } | |
| else { | |
| # Otherwise, strictly select only the requested columns | |
| $FinalOutput = $ResultsList | Select-Object -Property $SelectProperties | |
| } | |
| } | |
| # 5. EXPORT OR RETURN | |
| if ($ExportPath) { | |
| Write-Verbose "Exporting to $ExportFormat ($ExportPath)..." | |
| try { | |
| if ($ExportFormat -eq 'CSV') { | |
| # Note: If 'Match' (Raw Object) is selected, it may not render well in CSV | |
| $FinalOutput | Export-Csv -Path $ExportPath -NoTypeInformation -Encoding UTF8 | |
| } | |
| elseif ($ExportFormat -eq 'JSON') { | |
| $FinalOutput | ConvertTo-Json -Depth 2 | Set-Content -Path $ExportPath -Encoding UTF8 | |
| } | |
| Write-Host "Success: Saved to $ExportPath" -ForegroundColor Green | |
| } | |
| catch { | |
| Write-Error "Export Failed: $_" | |
| } | |
| } | |
| else { | |
| # Return objects to pipeline | |
| return $FinalOutput | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment