@@ -0,0 +1,351 @@
<?php
/**
* Use php curl multi, rolling request url.
*
* @author liwanghui@dratio.com
*/
class CurlRoll
{
/**
* @var int
* 并发请求数,设置此值过大,同一时间内如果请求远端主机会很容易被判定为DDos攻击
*/
private $ window_size = 5 ;
/**
* @var float
* curl_multi_select 处理超时时间.
*/
private $ timeout = 10 ;
/**
* @var array
* 请求对象 CurlRequest 实例数组
*/
private $ requests = array ();
/**
* @var array
* 并发请求map
*/
private $ requestMap = array ();
/**
* @var string|array
* callback function,结果处理回调函数.
*/
private $ callback ;
/**
* @var array
* HTTP request default options.
*/
private $ options = array (
CURLOPT_SSL_VERIFYPEER => 0 , //不开启https请求
CURLOPT_RETURNTRANSFER => 1 , //请求信息以文件流方式返回
CURLOPT_CONNECTTIMEOUT => 10 , //连接超时时间
CURLOPT_TIMEOUT => 20 , //设置curl执行最大时间
CURLOPT_FOLLOWLOCATION => 1 , //curl允许根据response location的值重定向请求
CURLOPT_MAXREDIRS => 5 , //CURLOPT_FOLLOWLOCATION为真后,此值设定重定向递归最大次数
CURLOPT_HEADER => 0 , //设置为true,请求返回的文件流中就会包含response header
CURLOPT_AUTOREFERER => true , //当根据Location重定向时,自动设置header中的referer信息
CURLOPT_ENCODING => "" , //HTTP请求头中"Accept-Encoding"的值,为空发送所有支持的编码类型
);
/**
* @var array
* HTTP Request发送的header信息
*/
private $ headers = array (
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 ' ,
'Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3 ' ,
'Connection: close ' ,
'Cache-Control: max-age=0 ' ,
//'X-FORWARD-FOR:8.8.8.8', //代理ip地址
//'CLIENT-IP:3.3.3.3', //客户端ip,REMOTE_ADDR不为空的情况下,是比较真是ip,不好伪造
);
private static $ agent = array (
//google chrome
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36 ' ,
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36 ' ,
'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36 ' ,
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36 ' ,
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0 ' ,
//firefox
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0 ' ,
'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0 ' ,
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0 ' ,
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) ' ,
//ie
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) ' ,
'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US) ' ,
'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US) ' ,
);
/**
* @param int
* $window_size
*/
public function __construct ($ window_size = 5 )
{
$ this ->window_size = (int )$ window_size ? : 5 ;
}
/**
* @return void
*/
public function __destruct ()
{
unset($ this ->window_size , $ this ->callback , $ this ->options , $ this ->headers , $ this ->requests );
}
/**
* @param string $name
* @return mixed
*/
public function __get ($ name )
{
return isset ($ this ->{$ name }) ? $ this ->{$ name } : null ;
}
/**
* @param string $name
* @param mixed $value
* @return bool
*/
public function __set ($ name , $ value )
{
// append the base options & headers
if ($ name == "options " || $ name == "headers " )
{
$ this ->{$ name } = $ value + $ this ->{$ name };
} else
{
$ this ->{$ name } = $ value ;
}
return true ;
}
/**
* Add a request to the request queue
*
* @param $url
* @return bool
*/
public function add ($ url )
{
$ this ->requests [] = $ this ->createRequest ($ url , 'GET ' , $ this ->headers , $ this ->options );
return true ;
}
/**
* Perform GET request
*
* @param string $url
* @param $headers 不是key-value数组,http请求request header部分的内容
* $headers = array(
* "POST ".$page." HTTP/1.0",
* "Content-type: text/xml;charset=\"utf-8\"",
* "Accept: text/xml",
* "Cache-Control: no-cache",
* "Pragma: no-cache",
* "SOAPAction: \"run\"",
* "Content-length: ".strlen($xml_data),
* "Authorization: Basic " . base64_encode($credentials)
* );
* @param $options
* @return bool
*/
public function get ($ url , $ headers = array (), $ options = array ())
{
$ this ->requests [] = $ this ->createRequest ($ url , "GET " , $ headers , $ options );
return true ;
}
/**
* Perform POST request
*
* @param string $url
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function post ($ url , $ headers = array (), $ options = array (), $ post_data )
{
$ this ->requests [] = $ this ->createRequest ($ url , "POST " , $ headers , $ options , $ post_data );
return true ;
}
/**
* Execute processing
*
* @param mixed $callback
* @return string|null
*/
public function execute ($ callback = null )
{
$ ret = null ;
if ($ callback )
{
$ this ->callback = $ callback ;
}
if (count ($ this ->requests ) == 1 )
{
$ ret = $ this ->single_curl ();
} else
{
$ ret = $ this ->rolling_curl ();
}
//clear all request once time
$ this ->requests = $ this ->requestMap = array ();
return $ ret ;
}
/**
* Performs a single curl request
*
* @access private
* @return string
*/
private function single_curl ()
{
$ ch = curl_init ();
$ request = array_shift ($ this ->requests );
$ options = $ this ->get_options ($ request );
curl_setopt_array ($ ch , $ options );
$ output = curl_exec ($ ch );
$ info = curl_getinfo ($ ch );
if ($ this ->callback && is_callable ($ this ->callback ))
{
$ callback = $ this ->callback ;
return call_user_func ($ callback , $ output , $ info , $ request );
} else
{
return $ output ;
}
}
/**
* Performs multiple curl requests
*
* @access private
* @return bool
*/
private function rolling_curl ()
{
$ n = count ($ this ->requests );
if ($ n < $ this ->window_size )
{
$ this ->window_size = $ n ;
}
if ($ this ->window_size < 2 )
{
return false ;
}
$ master = curl_multi_init ();
// start the first batch of requests
//注意变量i的作用域不是for循环体内,在后续还是可以使用的
for ($ i = 0 ; $ i < $ this ->window_size ; $ i ++)
{
$ ch = curl_init ();
$ options = $ this ->get_options ($ this ->requests [$ i ]);
curl_setopt_array ($ ch , $ options );
curl_multi_add_handle ($ master , $ ch );
$ key = (string )$ ch ;
$ this ->requestMap [$ key ] = $ i ;
}
do
{
while (($ execrun = curl_multi_exec ($ master , $ running )) == CURLM_CALL_MULTI_PERFORM ) ;
if ($ execrun != CURLM_OK )
{
break ;
}
// a request was just completed -- find out which one
while ($ done = curl_multi_info_read ($ master ))
{
// get the info and content returned on the request
$ info = curl_getinfo ($ done ['handle ' ]);
$ output = curl_multi_getcontent ($ done ['handle ' ]);
// send the return values to the callback function.
$ callback = $ this ->callback ;
if (is_callable ($ callback ))
{
$ key = (string )$ done ['handle ' ];
$ request = $ this ->requests [$ this ->requestMap [$ key ]];
unset($ this ->requestMap [$ key ]);
call_user_func ($ callback , $ output , $ info , $ request );
}
// start a new request (it's important to do this before removing the old one)
$ n = count ($ this ->requests );
if (($ i < $ n ) && isset ($ this ->requests [$ i ]))
{
$ ch = curl_init ();
$ options = $ this ->get_options ($ this ->requests [$ i ]);
curl_setopt_array ($ ch , $ options );
curl_multi_add_handle ($ master , $ ch );
// Add to our request Maps
$ key = (string )$ ch ;
$ this ->requestMap [$ key ] = $ i ;
$ i ++;
}
// remove the curl handle that just completed
curl_multi_remove_handle ($ master , $ done ['handle ' ]);
}
// Block for data in / output; error handling is done by curl_multi_exec
if ($ running )
{
curl_multi_select ($ master , $ this ->timeout );
}
} while ($ running );
return true ;
}
/**
* Helper function to set up a new request by setting the appropriate options
*
* @access private
* @param Request $request
* @return array
*/
private function get_options ($ request )
{
$ options = $ this ->__get ('options ' );
$ headers = $ this ->__get ('headers ' );
// set the request URL
$ options [CURLOPT_URL ] = $ request ->url ;
// set the request method
// curl默认就是get,设定post_data,既可认为请求是post请求
// posting data w/ this request?
if ($ request ->post_data )
{
$ options [CURLOPT_POST ] = true ;
$ options [CURLOPT_POSTFIELDS ] = $ request ->post_data ;
}
// append custom options for this specific request
if ($ request ->options )
{
$ options = $ options + $ request ->options ;
}
// 添加个性header
if ($ request ->headers )
{
$ headers = $ headers + $ request ->headers ;
}
$ options [CURLOPT_HTTPHEADER ] = $ headers ;
return $ options ;
}
private function createRequest ($ url , $ method , $ headers , $ options , $ data = array ())
{
$ o = new stdClass ();
$ o ->url = $ url ;
$ o ->method = $ method ;
$ o ->headers = $ headers ;
$ o ->options = $ options ;
$ o ->post_data = $ data ;
if (!isset ($ options [CURLOPT_USERAGENT ]))
{
$ o ->options [CURLOPT_USERAGENT ] = self ::$ agent [array_rand (self ::$ agent )];
}
return $ o ;
}
}