#
.SYNOPSIS
这是一个从慕课网批量下载视频的 PowerShell 脚本。
.DESCRIPTION
慕课网(http://www.imooc.com)是一个大型开放式网络课程教学网站。本脚本用于抓取慕课网的视频资源,并对视频资源进行二次加工整理,使您能更方便地离线观看视频教程。
.PARAMETER Uri
教程专辑的 URL,例如 'http://www.imooc.com/learn/197'。
.PARAMETER ID
教程专辑的 ID,支持多个,例如 75, 197, 156, 203。
.PARAMETER Combine
自动合并 *.flv 视频。
.PARAMETER RemoveOriginal
删除分段视频。只有在指定 -Combine 开关的情况下才生效。
.EXAMPLE
PS> .\Download-Imooc.ps1 http://www.imooc.com/learn/75
根据教程专辑的 URL 来下载。
.EXAMPLE
PS> .\Download-Imooc.ps1 75
根据教程专辑的 ID 来下载。
.EXAMPLE
PS> .\Download-Imooc.ps1 75, 197, 156, 203
根据教程专辑的 ID 来下载。支持输入多个 ID。
.EXAMPLE
PS> .\Download-Imooc.ps1 http://www.imooc.com/learn/75 -Combine -RemoveOriginal
根据教程专辑的 URL 来下载。完成之后合并所有的视频。
.EXAMPLE
PS> .\Download-Imooc.ps1 75, 197, 156, 203 -Combine -RemoveOriginal
根据教程专辑的 ID 来下载。支持输入多个 ID。完成之后合并所有的视频。,并且删除原始的分段视频。
.EXAMPLE
PS> .\Download-Imooc.ps1
不带任何参数运行该脚本,将自动检测当前目录下的所有下载文件夹,同时检测对应的专辑网站。对于本地缺失的视频或网站更新的视频,将自动续传。
.NOTES
只支持 PowerShell 3.0 及更高版本。
默认下载的是最高清晰度的视频。
若在使用中遇到问题,请联系 victorwoo@gmail.com
.INPUTS
None
.OUTPUTS
None
.LINK
https://github.com/victorwoo/Download-Imooc
.LINK
http://blog.vichamp.com/powershell/2014/09/26/download-videos-from-imooc-com-by-powershell/
#>
[CmdletBinding(DefaultParameterSetName = 'URI',
SupportsShouldProcess = $true,
ConfirmImpact = 'Medium')]
Param
(
[Parameter(ParameterSetName = 'URI',
Position = 0,
Mandatory = $false,
ValueFromPipeline = $true,
HelpMessage = '请输入专辑 URL')]
[string]
$Uri,
[Parameter(ParameterSetName = 'ID',
Position = 0,
Mandatory = $true,
ValueFromPipeline = $true,
HelpMessage = '请输入专辑 ID,多个 ID 请用逗号隔开')]
[int[]]
$ID,
[Switch]
$Combine,
[Switch]
$RemoveOriginal
)
# $DebugPreference = 'Continue' # Continue, SilentlyContinue
# $ProgressPreference='SilentlyContinue'
# $WhatIfPreference = $true # $true, $false
# 修正文件名,将文件系统不支持的字符替换成“.”
function Get-NormalizedFileName
{
Param (
$FileName
)
[System.IO.Path]::GetInvalidFileNameChars() | ForEach-Object {
$FileName = $FileName.Replace($_, '.')
}
$FileName = $FileName.Replace('+', '.')
return $FileName
}
# 修正目录名,将文件系统不支持的字符替换成“.”
function Get-NormalizedFolderName
{
Param (
$FolderName
)
[System.IO.Path]::GetInvalidPathChars() | ForEach-Object {
$FolderName = $FolderName.Replace($_, '.')
}
$FolderName = $FolderName.Replace('+', '.')
return $FolderName
}
# 从专辑页面中分析标题和视频页面的 ID。
function Get-CourseInfo
{
Param (
$Uri
)
$Uri = $Uri.Replace('/learn/', '/view/')
$Uri = $Uri.Replace('/qa/', '/view/')
$Uri = $Uri.Replace('/note/', '/view/')
$Uri = $Uri.Replace('/wiki/', '/view/')
$response = Invoke-WebRequest $Uri
$title = $response.ParsedHtml.title
if ($response.RawContent -cmatch '
\s*(?.*?)\s*
') {
$description = $Matches['course_shortdecription']
} else {
$description = ''
}
$Uri = $Uri.Replace('/view/', '/learn/')
$response = Invoke-WebRequest $Uri
echo $title
echo $description
$links = $response.Links
$links | ForEach-Object {
if ($_.href -cmatch '(?m)^/video/(\d+)$')
{
$id = $Matches[1]
$title = $_.InnerText
if ($title -cmatch '(?m)^\d.*? \((?\d{2}:\d{2})\)\s*$')
{
$during = $matches['DURING']
}
else
{
return
}
$during = [System.TimeSpan]::Parse("00:$during")
return [PSCustomObject][Ordered]@{
ID = $id;
Title = $title;
During = $during;
}
}
}
}
# 获取视频下载地址。
function Get-VideoUri
{
Param (
[Parameter(ValueFromPipeline = $true)]
$ID
)
$template = 'http://www.imooc.com/course/ajaxmediainfo/?mid={0}&mode=flash'
$uri = $template -f $ID
Write-Debug $uri
$result = Invoke-RestMethod $uri
if ($result.result -ne 0)
{
Write-Warning $result.result
}
$uri = $result.data.result.mpath[0]
# 取最高清晰度的版本。
$uri = $uri.Replace('L.flv', 'H.flv').Replace('M.flv', 'H.flv')
$uri = $uri.Replace('L.mp4', 'H.mp4').Replace('M.mp4', 'H.mp4')
return $uri
}
# 获取源码下载信息。
function Get-SourceInfo
{
Param (
[Parameter(ValueFromPipeline = $true)]
$ID
)
$template = 'http://www.imooc.com/video/{0}'
$uri = $template -f $ID
Write-Debug $uri
$response = Invoke-WebRequest $uri
$response.Links | Where-Object { $_.class -eq 'downcode' } | ForEach-Object {
$source = [PSCustomObject][Ordered]@{
Title = $_.title;
Href = $_.href;
}
echo $source
}
}
# 创建“.url”快捷方式。
function New-ShortCut
{
Param (
$Title,
$Uri
)
$shell = New-Object -ComObject 'wscript.shell'
$dir = pwd
$path = Join-Path $dir "$Title\$Title.url"
$lnk = $shell.CreateShortcut($path)
$lnk.TargetPath = $Uri
$lnk.Save()
}
# 判断 PowerShell 运行时版本。禁止在低版本的环境运行。
function Assert-PSVersion
{
if ($PSVersionTable.PSVersion.Major -lt 3)
{
Write-Error '请安装 PowerShell 3.0 以上的版本。'
exit
}
}
# 获取当前目录下已存在的课程。
function Get-ExistingCourses
{
Get-ChildItem -Directory | ForEach-Object {
$folder = $_
$expectedFilePath = (Join-Path $folder $folder.Name) + '.url'
if (Test-Path -PathType Leaf $expectedFilePath)
{
$shell = New-Object -ComObject 'wscript.shell'
$lnk = $shell.CreateShortcut($expectedFilePath)
$targetPath = $lnk.TargetPath
if ($targetPath -cmatch '(?m)\A^http://www\.imooc\.com/\w+/\d+$\z')
{
echo $targetPath
}
}
}
}
# 输出索引文件。
function Out-IndexFile
{
Param ($title, $description, $uri, $videos, $folderName)
$filePath = Join-Path $folderName 'info.txt'
$title | Set-Content $filePath -Encoding UTF8
$uri | Add-Content $filePath -Encoding UTF8
"`n$description`n" | Add-Content $filePath -Encoding UTF8
$global:offset = [System.TimeSpan]::Zero
$videos | Select-Object -Property @{
Name = 'Start';
Expression = {
$global:offset
};
}, @{
Name = 'End';
Expression = {
$global:offset += $_.During
$global:offset
};
}, During, Title |
Format-Table -AutoSize |
Out-String |
Add-Content $filePath -Encoding UTF8
}
function Combine-MP4
{
Param ($sources, $dest)
#mp4box -cat file1 -cat file2 [-new] dest
$params = @()
$sources | ForEach-Object {
$params += '-cat'
$params += $_
}
$params += '-new'
$params += $dest
$eap = $ErrorActionPreference
$ErrorActionPreference = "SilentlyContinue"
.\util\MP4Box.exe $params
$ErrorActionPreference = $eap
return $?
}
function Combine-Flv
{
Param ($sources, $dest)
$params = $sources
$params.Insert(0, $dest)
$eap = $ErrorActionPreference
$ErrorActionPreference = "SilentlyContinue"
.\util\FlvBind.exe $params.ToArray()
$ErrorActionPreference = $eap
<#
$outputPathes = $outputPathes | ForEach-Object {
"`"$_`""
}
Start-Process `
-WorkingDirectory (pwd) `
-FilePath .\FlvBind.exe `
-ArgumentList $outputPathes `
-NoNewWindow `
-Wait `
-ErrorAction SilentlyContinue `
-WindowStyle Hidden
#>
return $?
}
# 用 FlvBind.exe 合并视频文件。
function Combine-Videos
{
Param ($folderName, $actualDownloadAny, $outputPathes)
#if ($Combine -and ($actualDownloadAny -or -not (Test-Path $targetFile))) {
if ($Combine)
{
if ($outputPathes.Count -eq 0) {
return
}
$extension = [System.IO.Path]::GetExtension($outputPathes[0])
$targetFile = "$folderName\$folderName$extension"
# -and ($actualDownloadAny -or -not (Test-Path $targetFile))) {
if ($actualDownloadAny -or -not (Test-Path $targetFile) -or (Test-Path $targetFile) -and $PSCmdlet.ShouldProcess('分段视频', '合并'))
{
Write-Progress `
-Activity '下载' `
-Status '合并视频' `
-CurrentOperation ("合并视频(共 {0:N0} 个)" -f $outputPathes.Count) `
-Id 2 `
Write-Output ("合并视频(共 {0:N0} 个)" -f $outputPathes.Count)
if (Test-Path $targetFile) {
Remove-Item $targetFile
}
if ($extension.ToLower() -eq '.flv') {
$result = Combine-Flv $outputPathes $targetFile
} elseif ($extension.ToLower() -eq '.mp4') {
$result = Combine-MP4 $outputPathes $targetFile
}
if ($result)
{
Write-Output '视频合并成功'
if ($RemoveOriginal -and $PSCmdlet.ShouldProcess('分段视频', '删除'))
{
#$outputPathes.RemoveAt(0)
Remove-Item "$folderName\分段视频" -Recurse
<# $outputPathes | ForEach-Object {
Remove-Item $_
} #>
Write-Output '原始视频删除完毕'
}
}
else
{
Write-Warning '视频合并失败'
}
}
}
}
# 下载配套源代码。
function Download-Source
{
Param (
$FolderName,
$Title,
$Href
)
if (-not $Title -or -not $Href)
{
return
}
$extension = ($Href -split '\.')[-1]
if (!(Test-Path "$folderName\源代码")) {
$null = mkdir "$folderName\源代码"
}
$outputPath = "$folderName\源代码\$Title.$extension"
if (!(Test-Path $outputPath)) {
if ($PSCmdlet.ShouldProcess("$Href", 'Invoke-WebRequest'))
{
Invoke-WebRequest -Uri $Href -OutFile $outputPath
}
}
}
# 下载课程。
function Download-Course
{
Param (
[string]$Uri
)
Write-Progress `
-Activity '下载课程' `
-Status '分析视频 ID' `
-CurrentOperation $Uri `
-PercentComplete ($courcesIndex / $cources.Length * 100) `
-Id 1
$courseTitle, $description, [array]$videos = Get-CourseInfo -Uri $Uri
Write-Output "《$courseTitle》"
$folderName = Get-NormalizedFolderName $courseTitle
if (-not (Test-Path $folderName)) { $null = mkdir $folderName }
New-ShortCut -Title $courseTitle -Uri $Uri
$outputPathes = New-Object System.Collections.ArrayList
$actualDownloadAny = $false
#$videos = $videos | Select-Object -First 3
Write-Progress `
-Activity '下载课程' `
-CurrentOperation $courseTitle `
-PercentComplete ($courcesIndex / $cources.Length * 100) `
-Id 1
$videosIndex = 0
$videos | ForEach-Object {
if ($_.Title -cnotmatch '(?m)^\d')
{
$videosIndex++
return
}
$title = $_.Title
Write-Progress `
-Activity '下载视频' `
-CurrentOperation $title `
-PercentComplete ($videosIndex / $videos.Count * 100) `
-Id 2 `
-ParentId 1
[array]$sources = Get-SourceInfo $_.ID
$sourceIndex = 0
$sources | ForEach-Object {
Write-Progress `
-Activity '下载源代码' `
-CurrentOperation $_.Href `
-PercentComplete ($sourceIndex / $sources.Length * 100) `
-Id 3 `
-ParentId 2
Download-Source $folderName $_.Title $_.Href
$sourceIndex++
}
echo 源代码下载完成
Write-Progress `
-Activity '下载源代码' `
-Completed `
-Id 3 `
-ParentId 2
$videoUrl = Get-VideoUri $_.ID
$extension = ($videoUrl -split '\.')[-1]
$title = Get-NormalizedFileName $title
if (!(Test-Path "$folderName\分段视频")) {
$null = mkdir "$folderName\分段视频"
}
$outputPath = "$folderName\分段视频\$title.$extension"
$null = $outputPathes.Add($outputPath)
Write-Output $title
Write-Debug $videoUrl
Write-Debug $outputPath
if (Test-Path $outputPath)
{
Write-Debug "目标文件 $outputPath 已存在,自动跳过"
}
else
{
Write-Progress `
-Activity '下载视频' `
-CurrentOperation "$title" `
-PercentComplete ($videosIndex / $videos.Count * 100) `
-Id 2 `
-ParentId 1
if ($PSCmdlet.ShouldProcess("$videoUrl", 'Invoke-WebRequest'))
{
Invoke-WebRequest -Uri $videoUrl -OutFile $outputPath
$actualDownloadAny = $true
}
}
$videosIndex++
}
Out-IndexFile $courseTitle $description $Uri $videos $folderName
Write-Progress `
-Activity '下载视频' `
-Status '合并视频' `
-CurrentOperation $title `
-PercentComplete 100 `
-Id 2 `
-ParentId 1
Combine-Videos $folderName $actualDownloadAny $outputPathes
Write-Progress `
-Activity '下载视频' `
-CurrentOperation $title `
-Completed `
-Id 2 `
-ParentId 1
}
Assert-PSVersion
# 判断参数集
$chosen = $PSCmdlet.ParameterSetName
if ($chosen -eq 'URI')
{
if ($Uri)
{
[array]$cources = @($Uri)
Download-Course $Uri
}
else
{
[array]$cources = Get-ExistingCourses
}
}
if ($chosen -eq 'ID')
{
$template = 'http://www.imooc.com/view/{0}'
[array]$cources = @()
$ID | ForEach-Object {
$Uri = $template -f $_
$cources += $Uri
}
}
$courcesIndex = 0
$cources | ForEach-Object {
Download-Course $_
$courcesIndex++
}
Write-Progress `
-Activity '下载课程' `
-Completed `
-Id 1
echo '全部课程下载完毕'