想爬取电视剧的下载地址,发现是js生成的
用http.get获取到的res.body就是网页源码看到的,而不是thunder://QUFtYWduZXQlM0ElM0Z4dCUzRHVybiUzQWJ0aWglM0ExYjAxODBhODM0MGEwNTIzMTllZTUyMDA1MzA3ODE5OGIyZmZlYjQ2JTI2YW1wJTNCZG4lM0QlRTclQkElQTYlRTUlQUUlOUElRTclQUMlQUMxJUU5JTlCJTg2Lm1wNFpa,怎么爬取这样的迅雷链接呢?
可以用chromedp
// Command text is a chromedp example demonstrating how to extract text from a
// specific element.
package main
import (
"context"
"github.com/chromedp/chromedp"
"log"
"strings"
)
func main() {
// create context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
// run task list
var res string
err := chromedp.Run(ctx,
chromedp.Navigate(`https://www.993dy.com/vod-detail-id-78571.html`),
chromedp.InnerHTML("#ul1",&res),
)
if err != nil {
log.Fatal(err)
}
//配合goquery可以筛选出地址
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res))
if err != nil {
log.Fatal(err)
}
// Find the review items
doc.Find("div:first-child").Each(func(i int, s *goquery.Selection) {
// For each item found, get the title
title,_ := s.Find("a").Attr("href")
fmt.Printf("%s\n", title)
}