package main
import (
"io/ioutil"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"bufio"
"golang.org/x/text/transform"
"golang.org/x/text/encoding/unicode"
"log"
"regexp"
"strconv"
"net/http"
"fmt"
)
/* start Fetch.go*/
func Fetch(url string)([]byte ,error){
//resp,err:= http.Get(url)
//
//if err!=nil{
// return nil,err
//}
//
//defer resp.Body.Close()
//if resp.StatusCode != http.StatusOK{
// return nil,fmt.Errorf("Error: status code:%d",resp.StatusCode)
//}
client := &http.Client{}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Fatalln(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
resp, err := client.Do(req)
if err != nil {
log.Fatalln(err)
}
defer resp.Body.Close()
bodyReader:= bufio.NewReader(resp.Body)
e:= determineEncoding(bodyReader)
utf8reader:= transform.NewReader(bodyReader,e.NewDecoder())
return ioutil.ReadAll(utf8reader)
}
func determineEncoding(r *bufio.Reader) encoding.Encoding{
bytes,err := bufio.NewReader(r).Peek(1024)
if err !=nil{
log.Printf("Fetcher error:%v",err)
return unicode.UTF8
}
e,_,_:= charset.DetermineEncoding(bytes,"")
return e
}
/* end Fetch.go*/
/* start Type.go*/
type Request struct{
Url string
ParserFunc func([]byte) ParseResult
}
type ParseResult struct{
Requests []Request
Items []interface{}
}
func NilParser([]byte) ParseResult{
return ParseResult{}
}
/* end Type.go*/
/* start parser/city.go 爬取城市下每一个用户和网址*/
const cityRe = `<a href="(http://album.zhenai.com/u/[\d]+)" target="_blank">([^<]+)</a>`
func ParseCity(contents []byte) ParseResult{
re:= regexp.MustCompile(cityRe)
matches:= re.FindAllSubmatch(contents,-1)
result := ParseResult{}
for _,m:= range matches{
name:=string(m[2])
println(string(m[1]))
result.Items = append(result.Items,"User:"+string(m[2]))
result.Requests = append(result.Requests,Request{
Url:string(m[1]),
ParserFunc:func(c []byte) ParseResult{
return PaesrProfile(
c,name)
},
})
}
return result
}
/* end parser/city.go */
/* start parser/citylist.go */
const cityListRe = `(http://www.zhenai.com/zhenghun/[0-9a-z]+)"[^>]*>([^<]+)</a>`
func ParseCityList(contents []byte) ParseResult{
re:=regexp.MustCompile(cityListRe)
matches:= re.FindAllSubmatch(contents,-1)
result:=ParseResult{}
//测试,限制10个城市
limit:= 10
for _,m :=range matches{
result.Items = append(result.Items,string(m[2]))
result.Requests = append(
result.Requests,Request{
Url:string(m[1]),
ParserFunc:ParseCity,
})
limit--
if limit==0{
break
}
}
return result
}
/* end parser/citylist.go */
/* start profile.go */
type Profile struct {
Name string
Age int
Marry string
Constellation string
Height int
Weight int
Salary string
}
func (p Profile) String() string{
return p.Name +" " + p.Marry + strconv.Itoa(p.Age) +"olds "+ strconv.Itoa(p.Age) + "cm " + strconv.Itoa(p.Weight)+ "kg "
}
/* end profile.go */
/* start parser/profile.go */
var ageRe = regexp.MustCompile(`<div class="m-btn purple" data-v-bff6f798>([\d]+)岁</div>`)
var marry = regexp.MustCompile(`<div class="m-btn purple" data-v-bff6f798>(已婚)</div>`)
var constellation = regexp.MustCompile(`<div class="m-btn purple" data-v-bff6f798>(.*)座</div>`)
var height =regexp.MustCompile(`160cm`)
var weight =regexp.MustCompile(`<div class="m-btn purple" data-v-bff6f798>([\d]+)kg</div>`)
var salary = regexp.MustCompile(`<div class="m-btn purple" data-v-bff6f798>月收入:([^<]+)</div>`)
//name为上一级传递过来的
func PaesrProfile(contents []byte,name string) ParseResult{
//ioutil.WriteFile("test.html",contents,0x777)
profile:=Profile{}
profile.Name = name
age,err:= strconv.Atoi(extractString(contents,ageRe))
if err==nil{
profile.Age = age
}
height,err:= strconv.Atoi(extractString(contents,height))
if err==nil{
profile.Height = height
}
weight,err:= strconv.Atoi(extractString(contents,weight))
if err==nil{
profile.Weight = weight
}
profile.Salary = extractString(contents,salary)
profile.Constellation = extractString(contents,constellation)
if extractString(contents,marry)== ""{
profile.Marry ="未婚"
}else{
profile.Marry ="已婚"
}
result:=ParseResult{
Items:[]interface{}{profile},
}
return result
}
func extractString(contents []byte,re *regexp.Regexp) string{
match:=re.FindSubmatch(contents)
if len(match)>=2{
return string(match[1])
}else{
return ""
}
}
/* end parser/profile.go */
/* start engine.go */
func Run(seeds ...Request){
var requests []Request
for _,r := range seeds{
requests = append(requests,r)
}
for len(requests) >0{
r:=requests[0]
requests = requests[1:]
fmt.Printf("Fetching %s",r.Url)
body,err:= Fetch(r.Url)
if err!=nil{
log.Printf("Fetcher:error "+ "fetching url %s, : %v",r.Url,err)
continue
}
parseResult:= r.ParserFunc(body)
requests = append(requests,parseResult.Requests...)
for _,item:= range parseResult.Items{
fmt.Printf("Got item %s\n",item)
}
}
}
/* end engine.go */
func main(){
Run(Request{
Url:"http://www.zhenai.com/zhenghun",
ParserFunc:ParseCityList,
})
//paseTest()
}
func paseTest(){
contents,_:= ioutil.ReadFile("test.html")
profile:=Profile{}
age,err:= strconv.Atoi(extractString(contents,ageRe))
if err!=nil{
profile.Age = age
}
height,err:= strconv.Atoi(extractString(contents,height))
if err!=nil{
profile.Height = height
}
weight,err:= strconv.Atoi(extractString(contents,weight))
if err!=nil{
profile.Weight = weight
}
profile.Salary = extractString(contents,salary)
profile.Constellation = extractString(contents,constellation)
if extractString(contents,marry)== ""{
profile.Marry ="未婚"
}else{
profile.Marry ="已婚"
}
fmt.Printf("%s",profile)
}
以上就是本文的全部内容,希望本文的内容对大家的学习或者工作能带来一定的帮助,也希望大家多多支持 码农网
猜你喜欢:- Gson与List<T>对象间的相亲之旅
- 相亲指南:怎么带妹子来一场硬核黑客的约会
- 17000台工业主机宕机,让他开始调查“工业相亲对象”黑历史
- 机器学习实战教程(二):决策树基础篇之让我们从相亲说起
- 程序员相亲,介绍自己:我是做底层架构的!女方:啥时候到中高级
- 爬虫需谨慎,那些你不知道的爬虫与反爬虫套路!
本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们。
Computational Geometry
Mark de Berg、Otfried Cheong、Marc van Kreveld、Mark Overmars / Springer / 2008-4-16 / USD 49.95
This well-accepted introduction to computational geometry is a textbook for high-level undergraduate and low-level graduate courses. The focus is on algorithms and hence the book is well suited for st......一起来看看 《Computational Geometry》 这本书的介绍吧!