instructions

In this paper, the example of crawling is lazy gallery > home > unkeelable PNG> cartoon hand drawn column of the list of pictures, open the list page and download the picture using coroutine processing, the picture save directory in./data/img/(no automatic new).

code

Install dependencies

#Just install a go implementation of jQeury to get HTML node information
go get github.com/PuerkitoBio/goquery
Copy the code

The crawler code

package main

import (
	"bytes"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"io"
	"io/ioutil"
	"net/http"
	"os"
	"path"
	"strconv"
	"strings"
	"time"
)

type imgInfo struct {
	url  string
	name string
}

var pageChan = make(chan string.1000)
var htmlChan = make(chan string.1000)
var imgChan = make(chan imgInfo, 1000)
var isFinish = make(chan bool)

// grab an image of a particular page on a particular site
func main(a) {
	// Destination address, page number spelled last
	baseUrl := "https://www.lanrentuku.com/pngsucai/cate_katongshouhui_"
	// Just climb a few pages to test the water
	for i := 1; i <= 2; i++ {
		pageChan <- baseUrl + strconv.Itoa(i)
	}

	// The coroutine gets the content of the web page and places it in the channel
	go getPage()
	// The coroutine takes the image link from the web page content and places it in the channel
	go parsePage()
	// The coroutine downloads the image locally according to the image link in the channel
	go downloadImg()

	<-isFinish
	fmt.Println("All done!")}// checkErr handles possible error messages
func checkErr(err error) {
	iferr ! =nil {
		panic(err)
	}
}

func getPage(a) {
	for page := range pageChan {
		fmt.Println("Currently fetching page :" + page)
		r, _ := http.Get(page)
		body, _ := ioutil.ReadAll(r.Body)
		htmlChan <- string(body)

		r.Body.Close()
		time.Sleep(time.Millisecond * 300)}}func parsePage(a) {
	for html := range htmlChan {
		dom, err := goquery.NewDocumentFromReader(strings.NewReader(html))
		checkErr(err)
		// Traverse the page's target image node to get the image URL and file name
		dom.Find(".list-pic .item-img img").Each(func(i int, selection *goquery.Selection) {
			imgUrl, ok := selection.Attr("src")
			if ok {
				imgName, ok := selection.Attr("alt")
				if! ok { imgName = path.Base(imgUrl) }else {
					imgName += path.Ext(imgUrl)
				}

				imgChan <- imgInfo{name: imgName, url: imgUrl}
			}
		})
	}
}

func downloadImg(a) {
	for {
		select {
		case img := <-imgChan:
			/ / picture url: "https://d1.lanrentuku.com/upload/cover/5fac9b67d30bd.jpg"
			// Create directory
			err := os.MkdirAll("./data/img", os.ModePerm)
			checkErr(err)

			// Save the image
			fmt.Println("Saving pictures :", img.url)
			f, err := os.Create("./data/img/" + img.name)
			checkErr(err)
			r, _ := http.Get(img.url)
			body, _ := ioutil.ReadAll(r.Body)
			io.Copy(f, bytes.NewReader(body))

			// Release resources
			r.Body.Close()
			f.Close()
			// Take it to a certain extent
			time.Sleep(time.Millisecond * 300)

		case <-time.After(time.Second * 15) :// Timeout terminates the program
			isFinish <- true}}}Copy the code

Page Information Example

Download good pictures

reference

  • The use of goquery