继上一个例子,我们来改写一下
fetcher里新建fetcher.go
package fetcher
import (
"io/ioutil"
"net/http"
)
func Fetch(url string) ([]byte, error) {
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
return ioutil.ReadAll(res.Body)
}
parser里新建parser.go
package parser
import (
"regexp"
)
func Parse(contents []byte, re string) string {
rep := regexp.MustCompile(re) //匹配正则
match := rep.FindAllStringSubmatch(string(contents), -1) //返回字符串切片,-1表示全部,1表示匹配1个,2就匹配2个
var result string
for _, m := range match {
result = result + "\n" + m[1] //m[1]表示正则里第一个()里的内容
}
return result
}
engine里新建engine.go
package engine
import (
"fmt"
"log"
"crawler0.1/fetcher"
"crawler0.1/parser"
)
func Run(url string, reg string) {
contents, err := fetcher.Fetch(url)
if err != nil {
log.Fatal(err)
}
result := parser.Parse(contents, reg) //解析Fetch到的内容
fmt.Println(result) //打印匹配到的内容
}
main.go里
package main
import "crawler0.1/engine"
func main() {
engine.Run("https://www.193291.com/videodetails/50977.html", `thurl="(http:\/\/[^"]+)"`)
}