v2.0.0
2.0.0.0Beyond all doubts,It is the best php curl library.
Apache-2.0
The Requires
- php >=5.1.0
- ext-curl *
- electrolinux/phpquery 0.9.*
by Ares
curl curlmulti php-curlmulti
The best php curl library.
Implemented by using php-curl internal io event with high performance,high universality,high extensibility which especially suitable for massive tasks and complex logic cases., (*1)
PHP: >=5.3, (*2)
composer require ares333/php-curl
Curl::add() add tasks to task pool.Curl::start() start the event loop and block.Events(onSuccess,onFail,onInfo,onTask...) will be triggered and callbacks will be called on the fly.The loop finish when all tasks finished., (*3)
basic, (*4)
use Ares333\Curl\Curl; $curl = new Curl(); $curl->add( array( 'opt' => array( CURLOPT_URL => 'http://baidu.com', CURLOPT_RETURNTRANSFER => true ), 'args' => 'This is user argument' ), function ($r, $args) { echo "Request success for " . $r['info']['url'] . "\n"; echo "\nHeader info:\n"; print_r($r['info']); echo "\nRaw header:\n"; print_r($r['header']); echo "\nArgs:\n"; print_r($args); echo "\n\nBody size:\n"; echo strlen($r['body']) . ' bytes'; echo "\n"; }); $curl->start();
file download, (*5)
use Ares333\Curl\Curl; $curl = new Curl(); $url = 'https://www.baidu.com/img/bd_logo1.png'; $file = __DIR__ . '/download.png'; // $fp is closed automatically on download finished. $fp = fopen($file, 'w'); $curl->add( array( 'opt' => array( CURLOPT_URL => $url, CURLOPT_FILE => $fp, CURLOPT_HEADER => false ), 'args' => array( 'file' => $file ) ), function ($r, $args) { if($r['info']['http_code']==200) { echo "download finished successfully, file=$args[file]\n"; }else{ echo "download failed\n"; } })->start();
task callback, (*6)
Task can be added in task callback. See more details in Curl::$onTask., (*7)
use Ares333\Curl\Toolkit; use Ares333\Curl\Curl; $toolkit = new Toolkit(); $toolkit->setCurl(); $curl = $toolkit->getCurl(); $curl->maxThread = 1; $curl->onTask = function ($curl) { static $i = 0; if ($i >= 50) { return; } $url = 'http://www.baidu.com'; /** @var Curl $curl */ $curl->add( array( 'opt' => array( CURLOPT_URL => $url . '?wd=' . $i ++ ) )); }; $curl->start();
running info, (*8)
use Ares333\Curl\Toolkit; use Ares333\Curl\Curl; $curl = new Curl(); $toolkit = new Toolkit(); $curl->onInfo = array( $toolkit, 'onInfo' ); $curl->maxThread = 2; $url = 'http://www.baidu.com'; for ($i = 0; $i < 100; $i ++) { $curl->add( array( 'opt' => array( CURLOPT_URL => $url . '?wd=' . $i ) )); } $curl->start();
Run in cli and will output with following format:, (*9)
SPD DWN FNH CACHE RUN ACTIVE POOL QUEUE TASK FAIL 457KB 3MB 24 0 3 3 73 0 100 0
'onInfo' callback will receive all information.The default callback only show part of it., (*10)
SPD:Download speed DWN:Bytes downloaded FNH:Task count which has finished CACHE:Cache count which were used RUN:Task running count ACTIVE:Task count which has IO activities POOL:Task count in task pool QUEUE:Task count which has finished and waiting for onSuccess callback to process TASK:Task count has been added to the task pool FAIL:Task count which has failed after retry.
transparent cache, (*11)
use Ares333\Curl\Toolkit; use Ares333\Curl\Curl; $curl = new Curl(); $toolkit = new Toolkit(); $curl->onInfo = array( $toolkit, 'onInfo' ); $curl->maxThread = 2; $curl->cache['enable'] = true; $curl->cache['dir'] = __DIR__ . '/output/cache'; if (! is_dir($curl->cache['dir'])) { mkdir($curl->cache['dir'], 0755, true); } $url = 'http://www.baidu.com'; for ($i = 0; $i < 20; $i ++) { $curl->add( array( 'opt' => array( CURLOPT_URL => $url . '?wd=' . $i ) )); } $curl->start();
Run the script second time and will output:, (*12)
SPD DWN FNH CACHE RUN ACTIVE POOL QUEUE TASK FAIL 0KB 0MB 20 20 0 0 0 0 20 0
The result indicate that all tasks is using cache and there is no network activity., (*13)
dynamic tasks, (*14)
use Ares333\Curl\Curl; $curl = new Curl(); $url = 'http://baidu.com'; $curl->add(array( 'opt' => array( CURLOPT_URL => $url ) ), 'cb1'); echo "add $url\n"; $curl->start(); function cb1($r) { echo "finish " . $r['info']['url'] . "\n"; $url = 'http://bing.com'; $r['curl']->add( array( 'opt' => array( CURLOPT_URL => $url ) ), 'cb2'); echo "add $url\n"; } function cb2($r) { echo "finish " . $r['info']['url'] . "\n"; }
Output is as below:, (*15)
add http://baidu.com finish https://www.baidu.com/ add http://bing.com finish http://cn.bing.com/
Finished url has '/' suffix because curl has processed the 3xx redirect automatically(Curl::$opt[CURLOPT_FOLLOWLOCATION]=true). Curl::onTask should be used to deal with massive tasks., (*16)
public $maxThread = 10
Max work parallels which can be change on the fly., (*17)
public $maxTry = 3
Max retry before onFail event is triggered., (*18)
public $opt = array ()
Global CURLOPT_* which can be overwritten by task config., (*19)
public $cache = array( 'enable' => false, 'compress' => 0, //0-9,6 is a good choice if you want use compress. 'dir' => null, //Cache dir which must exists. 'expire' => 86400, 'verifyPost' => false //If http post will be part of cache id. );
Global cache config.Cache id is mapped from url.The config can be overwritten by task config and onSuccess callback return value with same format., (*20)
public $taskPoolType = 'queue'
stack or queue., (*21)
public $onTask = null
Will be triggered when work parallels is less than Curl::$maxThread and task pool is empty.The callback parameter is current Curl instance., (*22)
public $onInfo = null
Running state callback is triggered on IO events with max frequency 1/s.The parameters are as below: 1. $info array with two keys 'all' and 'running'.Key 'running' contains response header(curl_getinfo()) for each running task.Key 'all' contains global information with keys as below: + $info['all']['downloadSpeed'] Download speed. + $info['all']['bodySize'] Body sized downloaded. + $info['all']['headerSize'] Header size downloaded. + $info['all']['activeNum'] Task has IO activity. + $info['all']['queueNum'] Tasks waiting for onSuccess. + $info['all']['finishNum'] Tasks has finished. + $info['all']['cacheNum'] Cache hits. + $info['all']['failNum'] Failed tasks number after retry. + $info['all']['taskNum'] Task number in the task pool. + $info['all']['taskRunningNum'] Running task number. + $info['all']['taskPoolNum'] Task pool number. + $info['all']['taskFailNum'] Retrying task number. 2. Current Curl instance. 3. Is last call or not., (*23)
public $onEvent = null
Triggered on IO events.The callback parameter is current Curl instance., (*24)
public $onFail = null
Global callback for failed task which can be overwritten by task 'onTask'.The callback receive two parameters. 1. array with keys as below: + errorCode CURLE_* constants. + errorMsg Error message. + info Response header. + curl Current Curl instance. 2. $item['args'] value from Curl::add()., (*25)
public function add(array $item, $onSuccess = null, $onFail = null, $ahead = null)
Add one task to the pool. + $item 1. $item['opt']=array() CURLOPT_* for current task. 2. $item['args'] Parameters for callbacks. 3. $item['cache']=array() Cache config for current task. + $onSuccess Triggered on task finish. + Callback has two Parameters: 1. $result Array with keys as below: + $result['info'] Response header. + $result['curl'] Current Curl instance. + $result['body'] Response body.Not exist in download task. + $result['header'] Raw response header.Exists when CURLOPT_HEADER was enabled. + $result['cacheFile'] Exists when cache is used. 2. Value from $item['args'] + Values can be returned.Must be array if exist.Array keys is as below: + cache Same format with Curl::$cache.This is the last chance to control caching. + $onFail Overwrite Curl::$onFail。 + $ahead Add to high priority poll or not., (*26)
Return: current Curl instance., (*27)
public function start()
Start the event loop and block., (*28)
public function stop()
Stop the event loop and return unprocessed tasks., (*29)
public function parseResponse($response)
Parse http header and body from response., (*30)
public function getCacheFile($url, $post = null)
Generate relative cache path., (*31)
function setCurl($curl = null)
Default Curl instance is used if $curl is null., (*32)
The default instance will initialize Curl::$opt,Curl::onInfo,Curl::onFail. Curl::$opt initial values are as follows:, (*33)
array( CURLINFO_HEADER_OUT => true, CURLOPT_HEADER => true, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => 30, CURLOPT_AUTOREFERER => true, CURLOPT_USERAGENT => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_SSL_VERIFYHOST => false, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_MAXREDIRS => 5 )
function onFail($error, $args)
Default fail callback.See Curl::$onFail for details., (*34)
function onInfo($info)
Default info callback.See Curl::onInfo for details., (*35)
The method can be triggered manually with a string parameter which will be added to output buffer.The purpose is to avoid interference of shell control characters., (*36)
function htmlEncode($html, $in = null, $out = 'UTF-8', $mode = 'auto')
Powerful html encoding transformer which can get current encoding automatically and replace html encoding value in <head></head>. Parameters: + $html Html string. + $in Current encoding.It's best to specify one. + $out Target encoding. + $mode auto|iconv|mb_convert_encoding., (*37)
Return: New encoded html., (*38)
function isUrl($url)
Full url or not.Return bool., (*39)
function formatUrl($url)
Replace space,process scheme and hosts and remove anchor etc., (*40)
function buildUrl(array $parse)
Inverse function for parse_url()., (*41)
function uri2url($uri, $urlCurrent)
Transform uri to full url for currentPage.$urlCurrent should be redirected after 3xx., (*42)
function url2uri($url, $urlCurrent)
Transform full url to uri for currentPage.$urlCurrent should be redirected after 3xx., (*43)
function url2dir($url)
Transform full url to dir.$urlCurrent should be redirected after 3xx., (*44)
function url2absolute($url, $urlCurrent)
Combine a base URL and a relative URL to produce a new absolute URL., (*45)
function urlRemoveDotSegments($path)
Filter out "." and ".." segments from a URL's path and return the result., (*46)
function getCurl()
Return current Curl instance., (*47)
Beyond all doubts,It is the best php curl library.
Apache-2.0
curl curlmulti php-curlmulti