From 2b4abf6338e0bb50ea45130286d3ed1793c9a945 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Mon, 14 Dec 2020 02:01:53 +0100 Subject: [PATCH] Add function for running multiple curl queries simultaneously --- MD_STD.php | 89 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/MD_STD.php b/MD_STD.php index 69b475e..8fa0ae8 100644 --- a/MD_STD.php +++ b/MD_STD.php @@ -174,6 +174,38 @@ final class MD_STD { } + /** + * Initializes a curl request with the given presets. + * + * @param string $url URL to query. + * @param integer $timeout Timeout in milliseconds. + * + * @return resource + */ + public static function curl_init(string $url, int $timeout) { + + $curl = \curl_init(); + + \curl_setopt($curl, CURLOPT_URL, $url); + \curl_setopt($curl, CURLOPT_HEADER, false); + \curl_setopt($curl, CURLOPT_CONNECTTIMEOUT_MS, $timeout); //timeout in seconds + \curl_setopt($curl, CURLOPT_TIMEOUT_MS, $timeout); //timeout in seconds + \curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); + // \curl_setopt($curl, CURLOPT_COOKIESESSION, true); + \curl_setopt($curl, CURLOPT_AUTOREFERER, true); + \curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'); + + /* + if (!file_exists(__DIR__ . '/../../curled.txt')) { + touch (__DIR__ . '/../../curled.txt'); + } + file_put_contents(__DIR__ . '/../../curled.txt', $url . PHP_EOL, FILE_APPEND); + */ + + return $curl; + + } + /** * Wrapper for curling contents from the web. * @@ -184,17 +216,9 @@ final class MD_STD { */ public static function runCurl(string $url, int $timeout = 1200):string { - $curl = \curl_init(); - - \curl_setopt($curl, CURLOPT_URL, $url); - \curl_setopt($curl, CURLOPT_HEADER, false); - \curl_setopt($curl, CURLOPT_CONNECTTIMEOUT_MS, $timeout); //timeout in seconds - \curl_setopt($curl, CURLOPT_TIMEOUT_MS, $timeout); //timeout in seconds + $curl = self::curl_init($url, $timeout); \curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); - \curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); - // \curl_setopt($curl, CURLOPT_COOKIESESSION, true); - \curl_setopt($curl, CURLOPT_AUTOREFERER, true); - \curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'); + $result = \curl_exec($curl); // if ($err = curl_errno($curl)) echo $err; @@ -206,6 +230,51 @@ final class MD_STD { } + /** + * Wrapper for curling multiple pages from the web at ones and returning their contents. + * Adapted from hushuilong's comment at https://www.php.net/manual/de/function.curl-multi-init.php#105252. + * + * @param array $urls URL to query. + * @param integer $timeout Timeout in milliseconds. + * + * @return array + */ + public static function runCurlMulti(array $urls, int $timeout = 1200):array { + + if (!($mh = curl_multi_init())) { + throw new exception("Failed to set up multi handle"); + } + + $curl_array = []; + foreach($urls as $i => $url) { + + $curl_array[$i] = self::curl_init($url, $timeout); + + curl_setopt($curl_array[$i], CURLOPT_RETURNTRANSFER, true); + curl_multi_add_handle($mh, $curl_array[$i]); + + } + + $running = NULL; + do { + usleep(10000); + curl_multi_exec($mh, $running); + } while($running > 0); + + $res = []; + foreach($urls as $i => $url) { + $res[$i] = curl_multi_getcontent($curl_array[$i]); + } + + foreach($urls as $i => $url){ + curl_multi_remove_handle($mh, $curl_array[$i]); + } + curl_multi_close($mh); + + return $res; + + } + /** * Function lang_getfrombrowser gets the browser language based on HTTP headers. *