Skip to content

Instantly share code, notes, and snippets.

@CCRcmcpe
Last active May 3, 2022 01:31
Show Gist options
  • Select an option

  • Save CCRcmcpe/f88a5b93dcb918a13f4b90d3b4cd6c02 to your computer and use it in GitHub Desktop.

Select an option

Save CCRcmcpe/f88a5b93dcb918a13f4b90d3b4cd6c02 to your computer and use it in GitHub Desktop.
哔哩哔哩 爬取UP主专栏图片
param (
[ValidateRange(1, 999999999)]
[int]
$userId,
[ValidateNotNullOrEmpty()]
[string]
$saveDir
)
#$sessdata = ''
$PSStyle.Progress.UseOSCIndicator = $true
$PSStyle.Progress.View = 'Classic'
$ErrorActionPreference = 'Stop'
if (!(Test-Path $saveDir)) {
$null = mkdir $saveDir
}
$page = 0
$totalPagesCount = 1
$articles = $()
do {
$page++
$response = Invoke-RestMethod -Method Get `
-Uri 'https://api.bilibili.com/x/space/article' `
-Body @{ mid = $userId; pn = $page }
#-Headers @{ SESSDATA = $sessdata }
$articles += $response.data.articles
$totalPagesCount = [int]($response.data.count / $response.data.ps)
Write-Progress '爬取专栏' -Id 0 -Status "进行中 [$page/$totalPagesCount] 页 / 专栏 [$($articles.Count)] 个 ->" -PercentComplete ($page / $totalPagesCount * 100)
} while ($page -lt $totalPagesCount)
Write-Progress '爬取专栏' -Id 0 -Status '已完成'
$imageRegex = [regex]'<figure.*?data-src="(?<imageLink>//i0.hdslb.com/bfs/article/.*?)".*?<figcaption.*?>(?<imageDesc>.*?)</figcaption>.*?</figure>'
function NormalizePath([string] $path) {
$sb = [System.Text.StringBuilder]::new()
foreach ($char in $path.ToCharArray()) {
if ($char -notmatch '[\x00-\x1F\x7F"\*\/:<>\?\\\|]') {
$null = $sb.Append($char)
}
else {
$null = $sb.AppendFormat("%{0:X2}", [int]$char)
}
}
$sb.ToString()
}
$articleCount = 0;
$addedCount = 0;
$ignoredCount = 0;
foreach ($article in $articles) {
$articleCount++
Write-Progress '爬取图片' -Id 1 -Status "已处理 [$articleCount/$($articles.Count)] 个专栏 ->" -PercentComplete ($articleCount / $articles.Count * 100)
Write-Progress '添加下载' -Id 2 -ParentId 1 -Status "添加 $addedCount 个 | 忽略 $ignoredCount 个"
$articleSaveDir = "$saveDir\$(NormalizePath $article.title)"
if (Test-Path $articleSaveDir) {
$ignoredCount++
continue
}
else {
$addedCount++
$null = mkdir $articleSaveDir
}
do {
$failed = $false
$job = Start-Job {
$ProgressPreference = 'SilentlyContinue';
Invoke-WebRequest "https://www.bilibili.com/read/cv$input/"
} -InputObject $article.id
$null = Wait-Job $job -Timeout 5
if ($job.State -ne 'Completed') {
Stop-Job $job
$failed = $true
}
} until (!$failed)
$result = Receive-Job $job
$imageCount = 0
foreach ($match in $imageRegex.Matches($result.Content)) {
$imageCount++
$imageLink = "https:$($match.Groups['imageLink'].Value)"
$imageDesc = $match.Groups['imageDesc'].Value
$index = $imageDesc.IndexOf('<br')
if ($index -ne -1) {
$imageDesc = $imageDesc.Remove($index)
}
if ($imageDesc -eq '') {
$imageDesc = $imageCount.ToString()
}
$fileName = (NormalizePath $imageDesc) + '.jpg'
$ProgressPreference = 'SilentlyContinue'
$null = Invoke-WebRequest 'http://127.0.0.1:6800/jsonrpc' `
-Method Post `
-Headers @{
"Content-Type" = "application/json; charset=utf-8"
} `
-Body ([System.Text.Encoding]::UTF8.GetBytes((ConvertTo-Json @{
jsonrpc = '2.0';
method = 'aria2.addUri';
id = 'wdnmd';
params = @($imageLink),
@{dir = $articleSaveDir; out = $fileName }
})))
$ProgressPreference = 'Continue'
}
}
Write-Progress '爬取图片' -Id 1 -Status "已完成"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment