diff --git a/src/MD_STD.php b/src/MD_STD.php index dd95b24..1a98fc6 100644 --- a/src/MD_STD.php +++ b/src/MD_STD.php @@ -775,13 +775,55 @@ final class MD_STD { throw new Exception("Failed to get temporary file location"); } - $fp = \fopen($tmp_file, 'w'); + // Check remote headers - if (!($ch = \curl_init($url))) { + if (!($ch_headers = \curl_init())) { throw new Exception("Failed to initialize curl for $url"); }; - \curl_setopt($ch, CURLOPT_FILE, $fp); + \curl_setopt_array($ch_headers, [ + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_CONNECTTIMEOUT_MS => 10000, + CURLOPT_TIMEOUT_MS => 10000, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_AUTOREFERER => true, + CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_HEADER => true, + CURLOPT_NOBODY => true, + ]); + \curl_exec($ch_headers); + + // If the content length is too high and the request went out to a trusted source, + // return the content type as stated by the remote server. + + if (curl_getinfo($ch_headers, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T) > 2000000) { + if (($url_parsed = parse_url($url)) && str_contains($url_parsed['host'], 'archive.org') && !empty($remote_content_type = curl_getinfo($ch_headers, CURLINFO_CONTENT_TYPE))) { + return $remote_content_type; + } + } + + // Validate that the remote file really is of the correct content type. + + $fp = \fopen($tmp_file, 'w'); + + if (!($ch = \curl_init())) { + throw new Exception("Failed to initialize curl for $url"); + }; + + \curl_setopt_array($ch, [ + CURLOPT_URL => $url, + CURLOPT_CONNECTTIMEOUT_MS => 10000, + CURLOPT_TIMEOUT_MS => 10000, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_AUTOREFERER => true, + CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_FILE => $fp, + ]); \curl_exec($ch); $mime_type = MD_STD::mime_content_type($tmp_file);