用http.get只能获取到网页源码而chromedp模拟chrome浏览网页可以获取到视频的MP4地址
下面是不登录的情况,不用vip下载视频,配合goquery
package main
import (
"context"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
"io"
"log"
"net/http"
"os"
"strings"
"time"
)
func main() {
url:=os.Args[1]//第一个参数是网址
m:=geturls(url)//获取电视剧的列表链接,我爬取的是少儿,不一定适用别的电视剧
//create context
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless", true), // debug使用
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`),
}
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
c, _ := chromedp.NewExecAllocator(context.Background(), options...)
// create context
ctx, cancel := chromedp.NewContext(c, chromedp.WithLogf(log.Printf))
defer cancel()
// run task list
//var urls string
var res string
err := chromedp.Run(ctx,
chromedp.ActionFunc(func(ctx context.Context) error {
for k, v := range m {
download(v,k,ctx)
}
return nil
}),
chromedp.Sleep(5*time.Second),
)
if err != nil {
log.Fatal(err)
}
fmt.Println(res)
}
func download(url , name string,ctx context.Context) {
// Get the data from url
mp4url:=getmp4url(url,ctx)
resp, err := http.Get(mp4url)
if err != nil {
fmt.Println("http.get地址",url,"失败,3秒后重试")
time.Sleep(3*time.Second)
download(url,name,ctx)
}
defer resp.Body.Close()
// Create the file
fmt.Println("正在下载:",name)
out, err := os.Create(name)//在当前目录下创建文件
if err != nil {
fmt.Println("创建文件失败",err)
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
fmt.Println(name,"下载完成")
if err != nil {
fmt.Println("写入文件失败,3秒后重试",err)
time.Sleep(3*time.Second)
os.Remove(name)
download(url,name,ctx)
}
}
func getmp4url(url string,ctx context.Context) string{
// create context
// run task list
var res string
err := chromedp.Run(ctx,
chromedp.Navigate(url),
chromedp.Sleep(3*time.Second),
chromedp.OuterHTML("video",&res,chromedp.ByQuery),
chromedp.Sleep(1*time.Second),
)
if err != nil {
fmt.Println("获取video失败1")
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res))
if err != nil {
fmt.Println("获取video失败2")
}
mp4url,_:=doc.Find("video").Attr("src")
return mp4url
}
func geturls(url string) map[string]string {
m:=make(map[string]string)
res,_:=http.Get(url)
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
log.Fatal(err)
}
//
//// Find the review items
doc.Find("#playCont > div > div > div > div.torrent-panel > ul > li > a").Each(func(i int, s *goquery.Selection) {
// // For each item found, get the title
src,_ := s.Attr("href")
name := strings.TrimSpace(s.Text())+".mp4"
m[name]="http://www.fun.tv"+src
})
return m
}
函数geturls是用goquery来获取一个电视剧的整个链接和对应第几集chromedp.ActionFunc是执行自己定义的函数我这里的方向是把第几集和链接存在map里,然后在chromedp.ActionFunc里一个一个取出来,然后download里会模拟跳转到对应的链接里,获取MP4的地址下载,接着再跳转到下一个
如果是要下载vip的电视剧呢?
那就模拟一下登录
登录的代码如下
package main
import (
"context"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
"io"
"log"
"net/http"
"os"
"strings"
"time"
)
func main() {
m:=geturls("http://www.fun.tv/vplay/g-1005297/")
//create context
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless", false), // debug使用
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`),
}
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
c, _ := chromedp.NewExecAllocator(context.Background(), options...)
// create context
ctx, cancel := chromedp.NewContext(c, chromedp.WithLogf(log.Printf))
defer cancel()
// run task list
//var urls string
var res string
err := chromedp.Run(ctx,
chromedp.Navigate(`https://www.fun.tv/`),
chromedp.Sleep(4*time.Second),
chromedp.Click("login-btn",chromedp.ByID),//点击弹出登录框
chromedp.SendKeys("panel_account","username",chromedp.ByID),//发送账号
chromedp.Click("#ui-content-login-panel > div > div > div:nth-child(1) > div.login-conn.dialog-conn > p",chromedp.ByQuery),
//这里额外再点击一下,要不然用户表格会弹出推荐的账号列表就不能登录了
chromedp.SendKeys("panel_password","password",chromedp.ByID),//发送密码
chromedp.Sleep(2*time.Second),
chromedp.Click("loginsubmit",chromedp.ByID),//点击登录
chromedp.Sleep(2*time.Second),
chromedp.ActionFunc(func(ctx context.Context) error {
for k, v := range m {
download(v,k,ctx)
}
return nil
}),
chromedp.Sleep(5*time.Second),
)
if err != nil {
log.Fatal(err)
}
fmt.Println(res)
}
func download(url , name string,ctx context.Context) {
// Get the data from url
mp4url:=getmp4url(url,ctx)
resp, err := http.Get(mp4url)
if err != nil {
fmt.Println("http.get地址",url,"失败,3秒后重试")
time.Sleep(3*time.Second)
download(url,name,ctx)
}
defer resp.Body.Close()
// Create the file
fmt.Println("正在下载:",name)
out, err := os.Create(name)//在当前目录下创建1.png的文件
if err != nil {
fmt.Println("创建文件失败",err)
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body) //n是文件的字节数
fmt.Println(name,"下载完成")
if err != nil {
fmt.Println("写入文件失败,3秒后重试",err)
time.Sleep(3*time.Second)
os.Remove(name)
download(url,name,ctx)
}
}
func getmp4url(url string,ctx context.Context) string{
// create context
// run task list
var res string
err := chromedp.Run(ctx,
chromedp.Navigate(url),
chromedp.Sleep(3*time.Second),
chromedp.OuterHTML("video",&res,chromedp.ByQuery),
chromedp.Sleep(1*time.Second),
)
if err != nil {
fmt.Println("获取video失败1")
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res))
if err != nil {
fmt.Println("获取video失败2")
}
mp4url,_:=doc.Find("video").Attr("src")
return mp4url
}
func geturls(url string) map[string]string {
m:=make(map[string]string)
res,_:=http.Get(url)
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
log.Fatal(err)
}
//
//// Find the review items
doc.Find("#playCont > div > div > div > div.torrent-panel > ul > li > a").Each(func(i int, s *goquery.Selection) {
// // For each item found, get the title
src,_ := s.Attr("href")
name := strings.TrimSpace(s.Text())+".mp4"
m[name]="http://www.fun.tv"+src
})
return m
}
chromedp.Flag(“headless”, false)这里要写false,这样就会模拟一个chrome的gui,模拟填写账号密码鼠标点击登录,然后就跟上面的不登录的一样了。我填写true不显示gui界面就没用,搞不清楚为什么,我还是小白