Skip to content

Instantly share code, notes, and snippets.

@nine52seven
Forked from luxixing/CurlRoll.php
Created August 29, 2014 12:47

Revisions

  1. 陆西星 created this gist Oct 23, 2013.
    351 changes: 351 additions & 0 deletions CurlRoll.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,351 @@
    <?php
    /**
    * Use php curl multi, rolling request url.
    *
    * @author liwanghui@dratio.com
    */

    class CurlRoll
    {
    /**
    * @var int
    * 并发请求数,设置此值过大,同一时间内如果请求远端主机会很容易被判定为DDos攻击
    */
    private $window_size = 5;
    /**
    * @var float
    * curl_multi_select 处理超时时间.
    */
    private $timeout = 10;
    /**
    * @var array
    * 请求对象 CurlRequest 实例数组
    */
    private $requests = array();
    /**
    * @var array
    * 并发请求map
    */
    private $requestMap = array();
    /**
    * @var string|array
    * callback function,结果处理回调函数.
    */
    private $callback;
    /**
    * @var array
    * HTTP request default options.
    */
    private $options = array(
    CURLOPT_SSL_VERIFYPEER => 0, //不开启https请求
    CURLOPT_RETURNTRANSFER => 1, //请求信息以文件流方式返回
    CURLOPT_CONNECTTIMEOUT => 10, //连接超时时间
    CURLOPT_TIMEOUT => 20, //设置curl执行最大时间
    CURLOPT_FOLLOWLOCATION => 1, //curl允许根据response location的值重定向请求
    CURLOPT_MAXREDIRS => 5, //CURLOPT_FOLLOWLOCATION为真后,此值设定重定向递归最大次数
    CURLOPT_HEADER => 0, //设置为true,请求返回的文件流中就会包含response header
    CURLOPT_AUTOREFERER => true, //当根据Location重定向时,自动设置header中的referer信息
    CURLOPT_ENCODING => "", //HTTP请求头中"Accept-Encoding"的值,为空发送所有支持的编码类型
    );
    /**
    * @var array
    * HTTP Request发送的header信息
    */
    private $headers = array(
    'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
    'Connection: close',
    'Cache-Control: max-age=0',
    //'X-FORWARD-FOR:8.8.8.8', //代理ip地址
    //'CLIENT-IP:3.3.3.3', //客户端ip,REMOTE_ADDR不为空的情况下,是比较真是ip,不好伪造
    );
    private static $agent = array(
    //google chrome
    'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
    'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0',
    //firefox
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0',
    'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0',
    'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
    //ie
    'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
    'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)',
    'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)',
    );

    /**
    * @param int
    * $window_size
    */
    public function __construct($window_size = 5)
    {
    $this->window_size = (int)$window_size ? : 5;
    }

    /**
    * @return void
    */
    public function __destruct()
    {
    unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
    }

    /**
    * @param string $name
    * @return mixed
    */
    public function __get($name)
    {
    return isset($this->{$name}) ? $this->{$name} : null;
    }

    /**
    * @param string $name
    * @param mixed $value
    * @return bool
    */
    public function __set($name, $value)
    {
    // append the base options & headers
    if ($name == "options" || $name == "headers")
    {
    $this->{$name} = $value + $this->{$name};
    } else
    {
    $this->{$name} = $value;
    }
    return true;
    }

    /**
    * Add a request to the request queue
    *
    * @param $url
    * @return bool
    */
    public function add($url)
    {
    $this->requests[] = $this->createRequest($url, 'GET', $this->headers, $this->options);
    return true;
    }

    /**
    * Perform GET request
    *
    * @param string $url
    * @param $headers 不是key-value数组,http请求request header部分的内容
    * $headers = array(
    * "POST ".$page." HTTP/1.0",
    * "Content-type: text/xml;charset=\"utf-8\"",
    * "Accept: text/xml",
    * "Cache-Control: no-cache",
    * "Pragma: no-cache",
    * "SOAPAction: \"run\"",
    * "Content-length: ".strlen($xml_data),
    * "Authorization: Basic " . base64_encode($credentials)
    * );
    * @param $options
    * @return bool
    */
    public function get($url, $headers = array(), $options = array())
    {
    $this->requests[] = $this->createRequest($url, "GET", $headers, $options);
    return true;
    }

    /**
    * Perform POST request
    *
    * @param string $url
    * @param $post_data
    * @param $headers
    * @param $options
    * @return bool
    */
    public function post($url, $headers = array(), $options = array(), $post_data)
    {
    $this->requests[] = $this->createRequest($url, "POST", $headers, $options, $post_data);
    return true;
    }

    /**
    * Execute processing
    *
    * @param mixed $callback
    * @return string|null
    */
    public function execute($callback = null)
    {
    $ret = null;
    if ($callback)
    {
    $this->callback = $callback;
    }
    if (count($this->requests) == 1)
    {
    $ret = $this->single_curl();
    } else
    {
    $ret = $this->rolling_curl();
    }
    //clear all request once time
    $this->requests = $this->requestMap = array();
    return $ret;
    }

    /**
    * Performs a single curl request
    *
    * @access private
    * @return string
    */
    private function single_curl()
    {
    $ch = curl_init();
    $request = array_shift($this->requests);
    $options = $this->get_options($request);
    curl_setopt_array($ch, $options);
    $output = curl_exec($ch);
    $info = curl_getinfo($ch);
    if ($this->callback && is_callable($this->callback))
    {
    $callback = $this->callback;
    return call_user_func($callback, $output, $info, $request);
    } else
    {
    return $output;
    }
    }

    /**
    * Performs multiple curl requests
    *
    * @access private
    * @return bool
    */
    private function rolling_curl()
    {
    $n = count($this->requests);
    if ($n < $this->window_size)
    {
    $this->window_size = $n;
    }
    if ($this->window_size < 2)
    {
    return false;
    }
    $master = curl_multi_init();
    // start the first batch of requests
    //注意变量i的作用域不是for循环体内,在后续还是可以使用的
    for($i = 0; $i < $this->window_size; $i++)
    {
    $ch = curl_init();
    $options = $this->get_options($this->requests[$i]);
    curl_setopt_array($ch, $options);
    curl_multi_add_handle($master, $ch);
    $key = (string)$ch;
    $this->requestMap[$key] = $i;
    }
    do
    {
    while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
    if ($execrun != CURLM_OK)
    {
    break;
    }
    // a request was just completed -- find out which one
    while ($done = curl_multi_info_read($master))
    {
    // get the info and content returned on the request
    $info = curl_getinfo($done['handle']);
    $output = curl_multi_getcontent($done['handle']);
    // send the return values to the callback function.
    $callback = $this->callback;
    if (is_callable($callback))
    {
    $key = (string)$done['handle'];
    $request = $this->requests[$this->requestMap[$key]];
    unset($this->requestMap[$key]);
    call_user_func($callback, $output, $info, $request);
    }
    // start a new request (it's important to do this before removing the old one)
    $n = count($this->requests);
    if (($i < $n) && isset($this->requests[$i]))
    {
    $ch = curl_init();
    $options = $this->get_options($this->requests[$i]);
    curl_setopt_array($ch, $options);
    curl_multi_add_handle($master, $ch);
    // Add to our request Maps
    $key = (string)$ch;
    $this->requestMap[$key] = $i;
    $i++;
    }
    // remove the curl handle that just completed
    curl_multi_remove_handle($master, $done['handle']);
    }
    // Block for data in / output; error handling is done by curl_multi_exec
    if ($running)
    {
    curl_multi_select($master, $this->timeout);
    }

    } while ($running);
    return true;
    }


    /**
    * Helper function to set up a new request by setting the appropriate options
    *
    * @access private
    * @param Request $request
    * @return array
    */
    private function get_options($request)
    {
    $options = $this->__get('options');
    $headers = $this->__get('headers');
    // set the request URL
    $options[CURLOPT_URL] = $request->url;
    // set the request method
    // curl默认就是get,设定post_data,既可认为请求是post请求
    // posting data w/ this request?
    if ($request->post_data)
    {
    $options[CURLOPT_POST] = true;
    $options[CURLOPT_POSTFIELDS] = $request->post_data;
    }
    // append custom options for this specific request
    if ($request->options)
    {
    $options = $options + $request->options;
    }
    // 添加个性header
    if ($request->headers)
    {
    $headers = $headers + $request->headers;
    }
    $options[CURLOPT_HTTPHEADER] = $headers;
    return $options;
    }

    private function createRequest($url, $method, $headers, $options, $data = array())
    {
    $o = new stdClass();
    $o->url = $url;
    $o->method = $method;
    $o->headers = $headers;
    $o->options = $options;
    $o->post_data = $data;
    if (!isset($options[CURLOPT_USERAGENT]))
    {
    $o->options[CURLOPT_USERAGENT] = self::$agent[array_rand(self::$agent)];
    }
    return $o;
    }
    }