In order to gather enough 3 pieces of original had to test the code before to find out, see can gather enough words not? Before using PHP to do batch capture taobao data, the company’s internal interface data.. It works anyway. PHP uses curl_multi*

PHP batch request code generation demo

/** * curl curl *@author Zhenxun Du <5552123@qq.com>
 * @dateThe 2017-8-9 17:08:32 *@param array $curl_data
 * @param int $read_timeout
 * @param int $connect_timeout
 * @return array
 *
 */
function my_curl_multi($curl_data, $read_timeout = 30, $connect_timeout = 30)
{
    // add subcurl
    $mh = curl_multi_init();
    $curl_array = array(a);foreach ($curl_data as $k => $info) {
        $curl_array[$k] = curl_init($info['url']);
        curl_setopt($curl_array[$k], CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl_array[$k], CURLOPT_HEADER, 0);

        if ($read_timeout) {
            curl_setopt($curl_array[$k], CURLOPT_TIMEOUT, $read_timeout);
        }
        if ($connect_timeout) {
            curl_setopt($curl_array[$k], CURLOPT_CONNECTTIMEOUT, $connect_timeout);
        }

        if (!empty($info['headers'])) {
            curl_setopt($curl_array[$k], CURLOPT_HTTPHEADER, $info['headers']);
        }
        Send the entire body
        if (!empty($info['post_fields'])) {
            curl_setopt($curl_array[$k], CURLOPT_POSTFIELDS, $info['post_fields']);
        }

        curl_multi_add_handle($mh, $curl_array[$k]);
    }


    / / execution curl
    $running = null;
    do {
        $mrc = curl_multi_exec($mh, $running);
    } while ($mrc == CURLM_CALL_MULTI_PERFORM);


    while ($running && $mrc == CURLM_OK) {
        if (curl_multi_select($mh) == - 1) {
            usleep(100);
        }
        do {
            $mrc = curl_multi_exec($mh, $running);
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);
    }

    // Get the execution result
    $response = [];
    foreach ($curl_array as $key => $val) {
        $response[$key] = curl_multi_getcontent($val);
    }

    // Close the subcurl
    foreach ($curl_data as $key => $val) {
        curl_multi_remove_handle($mh, $curl_array[$key]);
    }

    // Close the parent curl
    curl_multi_close($mh);

    return $response;
}

/ / use
//curl curl
$curls_data = [
['url'= >'http://www.baidu.com'],
['url'= >'http://www.58.com'],
['url'= >'http://xs25.cn']];Curl curl curl curl curl curl curl curl curl curl curl curl
$multi_arr = my_curl_multi($curls_data, 180.60)

Copy the code

I believe that most math GO people have a better understanding of PHP language. PHP is also flexible to use, so let’s see if it’s easier and more efficient to write using GO! Writing this article belongs to in order to scrape together words, put together an original…

GO coroutine batch request first written

package main

import (
	"fmt"
	"io"
	"io/ioutil"
	"net/http"
	"time"
)

func main(a)  {
	start :=time.Now()
	ch :=make(chan string)

	var urls = []string{"http://www.baidu.com"."http://www.qq.com"."http://www.58.com"."http://xs25.cn",}for _,url:=range urls[:len(urls)]{
		go fetch(url,ch)
	}

	for range urls[:len(urls)]{
		fmt.Println(<-ch)
	}
	fmt.Printf("%.2fs elapsed \n",time.Since(start).Seconds())

}

func fetch(url string,ch chan<- string){
	start:=time.Now()
	res,err:=http.Get(url)
	iferr! =nil{
		ch <- fmt.Sprint(err)
		return
	}
	nbytes,err:=io.Copy(ioutil.Discard,res.Body)
	iferr! =nil{
		ch <- fmt.Sprintf("while reading %s:%v",url,err)
		return
	}
	secs:=time.Since(start).Seconds()
	ch <- fmt.Sprintf("%.2fs %7d %s",secs,nbytes,url)

}
Copy the code

GO coroutine batch request is written in the second form

package main

import (
	"fmt"
	"io"
	"io/ioutil"
	"net/http"
	"sync"
	"time"
)

func main(a) {
	var urls = []string{"http://www.baidu.com"."https://www.qq.com"."https://lf.58.com"."http://xs25.cn"
	}
	start := time.Now()
	wg := sync.WaitGroup{}
	wg.Add(len(urls))
	for _, val := range urls {
		go func(url string) {
			start := time.Now()
			res, err := http.Get(url)
			iferr ! =nil {
				fmt.Println(err)
				return
			}
			nbytes, err := io.Copy(ioutil.Discard, res.Body)
			iferr ! =nil {
				fmt.Printf("while reading %s:%v\n", url, err)
				return
			}
			fmt.Printf("%.2fs %7d %s\n", time.Since(start).Seconds(), nbytes, url)
			wg.Done()
		}(val)
	}

	wg.Wait()

	fmt.Printf("%.2fs elapsed \n", time.Since(start).Seconds())

}

Copy the code

0.12s 156965 www.baidu.com 0.44s 235408 www.qq.com 0.51s 63305 www.xs25.cn 0.56s 183305 lf.58.com

Just a few more words at the end

The above two methods are batch requests using the GO coroutine. Is it much simpler than PHP?

The first solution uses go coroutine + channel (CHAN), which is not suitable if you have a high volume, so the second solution uses Go coroutine + sync.waitGroup.

The WaitGroup object has a counter inside, initially starting at 0, and it has three methods: Add(), Done(), and Wait() to control the number of counters. Add(n) sets the counter to n, and Done() blocks the code each time the counter is -1 until the counter is reduced to 0.