From e85d47dfd4eee646aa00f60b21bbb076d9f19b26 Mon Sep 17 00:00:00 2001 From: wn_ Date: Fri, 22 Dec 2023 16:51:04 +0000 Subject: [PATCH 1/8] Use Guzzle --- classes/UrlHelper.php | 500 ++++++------------ composer.json | 3 +- composer.lock | 20 +- vendor/composer/installed.json | 18 +- vendor/composer/installed.php | 14 +- vendor/guzzlehttp/guzzle/CHANGELOG.md | 13 +- vendor/guzzlehttp/guzzle/README.md | 4 +- vendor/guzzlehttp/guzzle/UPGRADING.md | 14 +- vendor/guzzlehttp/guzzle/composer.json | 4 +- .../guzzle/src/Cookie/CookieJar.php | 2 +- .../guzzle/src/Cookie/SetCookie.php | 4 +- .../guzzle/src/Handler/CurlFactory.php | 2 +- .../guzzlehttp/guzzle/src/RequestOptions.php | 4 +- vendor/guzzlehttp/guzzle/src/Utils.php | 15 +- 14 files changed, 218 insertions(+), 399 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index dbbde55e6..b7583c1e9 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -17,7 +17,19 @@ class UrlHelper { static string $fetch_last_modified; static string $fetch_effective_url; static string $fetch_effective_ip_addr; - static bool $fetch_curl_used; + + private static ?GuzzleHttp\ClientInterface $client = null; + + private static function get_client(): GuzzleHttp\ClientInterface { + if (self::$client == null) { + self::$client = new GuzzleHttp\Client([ + GuzzleHttp\RequestOptions::COOKIES => false, + GuzzleHttp\RequestOptions::PROXY => Config::get(Config::HTTP_PROXY) ?: null, + ]); + } + + return self::$client; + } /** * @param array $parts @@ -184,57 +196,32 @@ class UrlHelper { /** * @return false|string */ - static function resolve_redirects(string $url, int $timeout, int $nest = 0) { + static function resolve_redirects(string $url, int $timeout) { $span = Tracer::start(__METHOD__); $span->setAttribute('func.args', json_encode(func_get_args())); + $client = self::get_client(); - // too many redirects - if ($nest > 10) { + try { + $response = $client->request('HEAD', $url, [ + GuzzleHttp\RequestOptions::CONNECT_TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT), + GuzzleHttp\RequestOptions::TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_TIMEOUT), + GuzzleHttp\RequestOptions::ALLOW_REDIRECTS => ['max' => 10, 'track_redirects' => true, 'http_errors' => false], + GuzzleHttp\RequestOptions::HEADERS => [ + 'User-Agent' => Config::get_user_agent(), + 'Connection' => 'close', + ], + ]); + } catch (GuzzleHttp\Exception\GuzzleException $ex) { + // TODO: catch just the "too many redirects" exception, and set a different 'error' for general issues $span->setAttribute('error', 'too many redirects'); $span->end(); return false; } - $context_options = array( - 'http' => array( - 'header' => array( - 'Connection: close' - ), - 'method' => 'HEAD', - 'timeout' => $timeout, - 'protocol_version'=> 1.1) - ); - - if (Config::get(Config::HTTP_PROXY)) { - $context_options['http']['request_fulluri'] = true; - $context_options['http']['proxy'] = Config::get(Config::HTTP_PROXY); - } - - $context = stream_context_create($context_options); - - // PHP 8 changed the second param from int to bool, but we still support PHP >= 7.4.0 - // @phpstan-ignore-next-line - $headers = get_headers($url, 0, $context); - - if (is_array($headers)) { - $headers = array_reverse($headers); // last one is the correct one - - foreach($headers as $header) { - if (stripos($header, 'Location:') === 0) { - $url = self::rewrite_relative($url, trim(substr($header, strlen('Location:')))); - - return self::resolve_redirects($url, $timeout, $nest + 1); - } - } - - $span->end(); - return $url; - } - - $span->setAttribute('error', 'request failed'); + // If a history header value doesn't exist there was no redirection and the original URL is fine. + $history_header = $response->getHeader(GuzzleHttp\RedirectMiddleware::HISTORY_HEADER); $span->end(); - // request failed? - return false; + return ($history_header ? end($history_header) : $url); } /** @@ -244,13 +231,14 @@ class UrlHelper { // TODO: max_size currently only works for CURL transfers // TODO: multiple-argument way is deprecated, first parameter is a hash now public static function fetch($options /* previously: 0: $url , 1: $type = false, 2: $login = false, 3: $pass = false, - 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false*/) { + 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false, 8: $retry_once_request = false */) { + $span = Tracer::start(__METHOD__); + $span->setAttribute('func.args', json_encode(func_get_args())); self::$fetch_last_error = ""; self::$fetch_last_error_code = -1; self::$fetch_last_error_content = ""; self::$fetch_last_content_type = ""; - self::$fetch_curl_used = false; self::$fetch_last_modified = ""; self::$fetch_effective_url = ""; self::$fetch_effective_ip_addr = ""; @@ -258,7 +246,7 @@ class UrlHelper { if (!is_array($options)) { // falling back on compatibility shim - $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent" ]; + $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent", "retry-once-request" ]; $tmp = []; for ($i = 0; $i < func_num_args(); $i++) { @@ -275,14 +263,12 @@ class UrlHelper { "post_query" => @func_get_arg(4), "timeout" => @func_get_arg(5), "timestamp" => @func_get_arg(6), - "useragent" => @func_get_arg(7) + "useragent" => @func_get_arg(7), + "retry-once-request" => @func_get_arg(8), ); */ } + $url = $options["url"]; - - $span = Tracer::start(__METHOD__); - $span->setAttribute('func.args', json_encode(func_get_args())); - $type = isset($options["type"]) ? $options["type"] : false; $login = isset($options["login"]) ? $options["login"] : false; $pass = isset($options["pass"]) ? $options["pass"] : false; @@ -303,8 +289,7 @@ class UrlHelper { $url = self::validate($url, true); if (!$url) { - self::$fetch_last_error = "Requested URL failed extended validation."; - + self::$fetch_last_error = 'Requested URL failed extended validation.'; $span->setAttribute('error', self::$fetch_last_error); $span->end(); return false; @@ -313,319 +298,146 @@ class UrlHelper { $url_host = parse_url($url, PHP_URL_HOST); $ip_addr = gethostbyname($url_host); - if (!$ip_addr || strpos($ip_addr, "127.") === 0) { + if (!$ip_addr || strpos($ip_addr, '127.') === 0) { self::$fetch_last_error = "URL hostname failed to resolve or resolved to a loopback address ($ip_addr)"; - $span->setAttribute('error', self::$fetch_last_error); $span->end(); return false; } - if (function_exists('curl_init') && !ini_get("open_basedir")) { + $req_options = [ + GuzzleHttp\RequestOptions::CONNECT_TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT), + GuzzleHttp\RequestOptions::TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_TIMEOUT), + GuzzleHttp\RequestOptions::ALLOW_REDIRECTS => $followlocation ? ['max' => 20, 'track_redirects' => true] : false, + GuzzleHttp\RequestOptions::HEADERS => [ + 'User-Agent' => $useragent ?: Config::get_user_agent(), + ], + ]; - self::$fetch_curl_used = true; + if ($last_modified && !$post_query) + $req_options[GuzzleHttp\RequestOptions::HEADERS]['If-Modified-Since'] = $last_modified; - $ch = curl_init($url); + if ($http_accept) + $req_options[GuzzleHttp\RequestOptions::HEADERS]['Accept'] = $http_accept; - if (!$ch) { - self::$fetch_last_error = "curl_init() failed"; - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; + if ($http_referrer) + $req_options[GuzzleHttp\RequestOptions::HEADERS]['Referer'] = $http_referrer; + + if ($login && $pass) + $req_options[GuzzleHttp\RequestOptions::AUTH] = [$login, $pass]; + + if ($post_query) + $req_options[GuzzleHttp\RequestOptions::FORM_PARAMS] = $post_query; + + if ($max_size) { + $req_options[GuzzleHttp\RequestOptions::PROGRESS] = function($download_size, $downloaded, $upload_size, $uploaded) use(&$max_size, $url) { + //Debug::log("[curl progressfunction] $downloaded $max_size", Debug::$LOG_EXTENDED); + + if ($downloaded > $max_size) { + Debug::log("[UrlHelper] fetch error: max size of $max_size bytes exceeded when downloading $url . Aborting.", Debug::LOG_VERBOSE); + throw new \LengthException("Download exceeded size limit"); + } + }; + + # Alternative/supplement to `progress` checking + $req_options[GuzzleHttp\RequestOptions::ON_HEADERS] = function(Psr\Http\Message\ResponseInterface $response) use(&$max_size, $url) { + $content_length = $response->getHeaderLine('Content-Length'); + if ($content_length > $max_size) { + Debug::log("[UrlHelper] fetch error: server indicated (via 'Content-Length: {$content_length}') max size of $max_size bytes " . + "would be exceeded when downloading $url . Aborting.", Debug::LOG_VERBOSE); + throw new \LengthException("Server sent 'Content-Length' exceeding download limit"); + } + }; + } + + $client = self::get_client(); + + try { + if (($options['retry-once-request'] ?? null) instanceof Psr\Http\Message\RequestInterface) { + $response = $client->send($options['retry-once-request']); + } else { + $response = $client->request($post_query ? 'POST' : 'GET', $url, $req_options); } + } catch (\LengthException $ex) { + // 'Content-Length' exceeded the download limit + self::$fetch_last_error = (string) $ex; + $span->setAttribute('error', self::$fetch_last_error); + $span->end(); + return false; + } catch (GuzzleHttp\Exception\GuzzleException $ex) { + self::$fetch_last_error = (string) $ex; - $curl_http_headers = []; + if ($ex instanceof GuzzleHttp\Exception\RequestException) { + if ($ex instanceof GuzzleHttp\Exception\BadResponseException) { + // 4xx or 5xx + self::$fetch_last_error_code = $ex->getResponse()->getStatusCode(); - if ($last_modified && !$post_query) - array_push($curl_http_headers, "If-Modified-Since: $last_modified"); + # TODO: Retry with CURLAUTH_ANY if the response code is 403? Was this actually an issue before? + # https://docs.guzzlephp.org/en/stable/faq.html#how-can-i-add-custom-curl-options + // if (self::$fetch_last_error_code === 403) {} - if ($http_accept) - array_push($curl_http_headers, "Accept: " . $http_accept); + self::$fetch_last_content_type = $ex->getResponse()->getHeaderLine('content-type'); - if (count($curl_http_headers) > 0) - curl_setopt($ch, CURLOPT_HTTPHEADER, $curl_http_headers); + if ($type && strpos(self::$fetch_last_content_type, "$type") === false) + self::$fetch_last_error_content = (string) $ex->getResponse()->getBody(); + } elseif (array_key_exists('errno', $ex->getHandlerContext())) { + $errno = (int) $ex->getHandlerContext()['errno']; - curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT)); - curl_setopt($ch, CURLOPT_TIMEOUT, $timeout ? $timeout : Config::get(Config::FILE_FETCH_TIMEOUT)); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $followlocation); - curl_setopt($ch, CURLOPT_MAXREDIRS, 20); - curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); - curl_setopt($ch, CURLOPT_USERAGENT, $useragent ? $useragent : Config::get_user_agent()); - curl_setopt($ch, CURLOPT_ENCODING, ""); - curl_setopt($ch, CURLOPT_COOKIEJAR, "/dev/null"); - - if ($http_referrer) - curl_setopt($ch, CURLOPT_REFERER, $http_referrer); - - if ($max_size) { - curl_setopt($ch, CURLOPT_NOPROGRESS, false); - curl_setopt($ch, CURLOPT_BUFFERSIZE, 16384); // needed to get 5 arguments in progress function? - - // holy shit closures in php - // download & upload are *expected* sizes respectively, could be zero - curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function($curl_handle, $download_size, $downloaded, $upload_size, $uploaded) use(&$max_size, $url) { - //Debug::log("[curl progressfunction] $downloaded $max_size", Debug::$LOG_EXTENDED); - - if ($downloaded > $max_size) { - Debug::log("[UrlHelper] fetch error: curl reached max size of $max_size bytes downloading $url, aborting.", Debug::LOG_VERBOSE); - return 1; - } - - return 0; - }); - - } - - if (Config::get(Config::HTTP_PROXY)) { - curl_setopt($ch, CURLOPT_PROXY, Config::get(Config::HTTP_PROXY)); - } - - if ($post_query) { - curl_setopt($ch, CURLOPT_POST, true); - curl_setopt($ch, CURLOPT_POSTFIELDS, $post_query); - } - - if ($login && $pass) - curl_setopt($ch, CURLOPT_USERPWD, "$login:$pass"); - - $ret = @curl_exec($ch); - $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - - // CURLAUTH_BASIC didn't work, let's retry with CURLAUTH_ANY in case it's actually something - // unusual like NTLM... - if ($http_code == 403 && $login && $pass) { - curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY); - - $ret = @curl_exec($ch); - } - - if (curl_errno($ch) === 23 || curl_errno($ch) === 61) { - curl_setopt($ch, CURLOPT_ENCODING, 'none'); - $ret = @curl_exec($ch); - } - - $headers_length = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $headers = explode("\r\n", substr($ret, 0, $headers_length)); - $contents = substr($ret, $headers_length); - - foreach ($headers as $header) { - if (strstr($header, ": ") !== false) { - list ($key, $value) = explode(": ", $header); - - if (strtolower($key) == "last-modified") { - self::$fetch_last_modified = $value; + // By default, all supported encoding types are sent via `Accept-Encoding` and decoding of + // responses with `Content-Encoding` is automatically attempted. If this fails, we do a + // single retry with `Accept-Encoding: none` to try and force an unencoded response. + if (($errno === \CURLE_WRITE_ERROR || $errno === \CURLE_BAD_CONTENT_ENCODING) && + !array_key_exists('retry-once-request', $options)) { + $options['retry-once-request'] = $ex->getRequest()->withHeader('Accept-Encoding', 'none'); + return self::fetch($options); } } - - if (substr(strtolower($header), 0, 7) == 'http/1.') { - self::$fetch_last_error_code = (int) substr($header, 9, 3); - self::$fetch_last_error = $header; - } - } - - $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - self::$fetch_last_content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); - - self::$fetch_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - - if (!self::validate(self::$fetch_effective_url, true)) { - self::$fetch_last_error = "URL received after redirection failed extended validation."; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - self::$fetch_effective_ip_addr = gethostbyname(parse_url(self::$fetch_effective_url, PHP_URL_HOST)); - - if (!self::$fetch_effective_ip_addr || strpos(self::$fetch_effective_ip_addr, "127.") === 0) { - self::$fetch_last_error = "URL hostname received after redirection failed to resolve or resolved to a loopback address (".self::$fetch_effective_ip_addr.")"; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - self::$fetch_last_error_code = $http_code; - - if ($http_code != 200 || $type && strpos(self::$fetch_last_content_type, "$type") === false) { - - if (curl_errno($ch) != 0) { - self::$fetch_last_error .= "; " . curl_errno($ch) . " " . curl_error($ch); - } else { - self::$fetch_last_error = "HTTP Code: $http_code "; - } - - self::$fetch_last_error_content = $contents; - curl_close($ch); - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - if (!$contents) { - if (curl_errno($ch) === 0) { - self::$fetch_last_error = 'Successful response, but no content was received.'; - } else { - self::$fetch_last_error = curl_errno($ch) . " " . curl_error($ch); - } - curl_close($ch); - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - curl_close($ch); - - $is_gzipped = RSSUtils::is_gzipped($contents); - - if ($is_gzipped && is_string($contents)) { - $tmp = @gzdecode($contents); - - if ($tmp) $contents = $tmp; } + $span->setAttribute('error', self::$fetch_last_error); $span->end(); - return $contents; - } else { - - self::$fetch_curl_used = false; - - if ($login && $pass){ - $url_parts = array(); - - preg_match("/(^[^:]*):\/\/(.*)/", $url, $url_parts); - - $pass = urlencode($pass); - - if ($url_parts[1] && $url_parts[2]) { - $url = $url_parts[1] . "://$login:$pass@" . $url_parts[2]; - } - } - - // TODO: should this support POST requests or not? idk - - $context_options = array( - 'http' => array( - 'header' => array( - 'Connection: close' - ), - 'method' => 'GET', - 'ignore_errors' => true, - 'timeout' => $timeout ? $timeout : Config::get(Config::FILE_FETCH_TIMEOUT), - 'protocol_version'=> 1.1) - ); - - if (!$post_query && $last_modified) - array_push($context_options['http']['header'], "If-Modified-Since: $last_modified"); - - if ($http_accept) - array_push($context_options['http']['header'], "Accept: $http_accept"); - - if ($http_referrer) - array_push($context_options['http']['header'], "Referer: $http_referrer"); - - if (Config::get(Config::HTTP_PROXY)) { - $context_options['http']['request_fulluri'] = true; - $context_options['http']['proxy'] = Config::get(Config::HTTP_PROXY); - } - - $context = stream_context_create($context_options); - - $old_error = error_get_last(); - - self::$fetch_effective_url = self::resolve_redirects($url, $timeout ? $timeout : Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT)); - - if (!self::validate(self::$fetch_effective_url, true)) { - self::$fetch_last_error = "URL received after redirection failed extended validation."; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - self::$fetch_effective_ip_addr = gethostbyname(parse_url(self::$fetch_effective_url, PHP_URL_HOST)); - - if (!self::$fetch_effective_ip_addr || strpos(self::$fetch_effective_ip_addr, "127.") === 0) { - self::$fetch_last_error = "URL hostname received after redirection failed to resolve or resolved to a loopback address (".self::$fetch_effective_ip_addr.")"; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - $data = @file_get_contents($url, false, $context); - - if ($data === false) { - self::$fetch_last_error = "'file_get_contents' failed."; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - foreach ($http_response_header as $header) { - if (strstr($header, ": ") !== false) { - list ($key, $value) = explode(": ", $header); - - $key = strtolower($key); - - if ($key == 'content-type') { - self::$fetch_last_content_type = $value; - // don't abort here b/c there might be more than one - // e.g. if we were being redirected -- last one is the right one - } else if ($key == 'last-modified') { - self::$fetch_last_modified = $value; - } else if ($key == 'location') { - self::$fetch_effective_url = $value; - } - } - - if (substr(strtolower($header), 0, 7) == 'http/1.') { - self::$fetch_last_error_code = (int) substr($header, 9, 3); - self::$fetch_last_error = $header; - } - } - - if (self::$fetch_last_error_code != 200) { - $error = error_get_last(); - - if (($error['message'] ?? '') != ($old_error['message'] ?? '')) { - self::$fetch_last_error .= "; " . $error["message"]; - } - - self::$fetch_last_error_content = $data; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } - - if ($data) { - $is_gzipped = RSSUtils::is_gzipped($data); - - if ($is_gzipped) { - $tmp = @gzdecode($data); - - if ($tmp) $data = $tmp; - } - - $span->end(); - return $data; - } else { - self::$fetch_last_error = 'Successful response, but no content was received.'; - - $span->setAttribute('error', self::$fetch_last_error); - $span->end(); - return false; - } + return false; } + + // Keep setting expected 'fetch_last_error_code' and 'fetch_last_error' values + self::$fetch_last_error_code = $response->getStatusCode(); + self::$fetch_last_error = "HTTP/{$response->getProtocolVersion()} {$response->getStatusCode()} {$response->getReasonPhrase()}"; + self::$fetch_last_modified = $response->getHeaderLine('last-modified'); + self::$fetch_last_content_type = $response->getHeaderLine('content-type'); + + // If a history header value doesn't exist there was no redirection and the original URL is fine. + $history_header = $response->getHeader(GuzzleHttp\RedirectMiddleware::HISTORY_HEADER); + self::$fetch_effective_url = $history_header ? end($history_header) : $url; + + if (!self::validate(self::$fetch_effective_url, true)) { + self::$fetch_last_error = "URL received after redirection failed extended validation."; + $span->setAttribute('error', self::$fetch_last_error); + $span->end(); + return false; + } + + self::$fetch_effective_ip_addr = gethostbyname(parse_url(self::$fetch_effective_url, PHP_URL_HOST)); + + if (!self::$fetch_effective_ip_addr || strpos(self::$fetch_effective_ip_addr, '127.') === 0) { + self::$fetch_last_error = 'URL hostname received after redirection failed to resolve or resolved to a loopback address (' . + self::$fetch_effective_ip_addr . ')'; + $span->setAttribute('error', self::$fetch_last_error); + $span->end(); + return false; + } + + $body = (string) $response->getBody(); + + if (!$body) { + self::$fetch_last_error = 'Successful response, but no content was received.'; + $span->setAttribute('error', self::$fetch_last_error); + $span->end(); + return false; + } + + $span->end(); + return $body; } /** diff --git a/composer.json b/composer.json index 69b3a6dc1..0ce390fa8 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,8 @@ "j4mie/idiorm": "dev-master", "open-telemetry/exporter-otlp": "^1.0", "php-http/guzzle7-adapter": "^1.0", - "soundasleep/html2text": "^2.1" + "soundasleep/html2text": "^2.1", + "guzzlehttp/guzzle": "^7.0" }, "require-dev": { "phpstan/phpstan": "1.10.3", diff --git a/composer.lock b/composer.lock index 598462e90..4c45934d3 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "d65a2e896d59d3d603fd6cda0db3b646", + "content-hash": "a0465ea624d9e79bc5d8b04a345b1ad6", "packages": [ { "name": "beberlei/assert", @@ -261,16 +261,16 @@ }, { "name": "guzzlehttp/guzzle", - "version": "7.8.0", + "version": "7.8.1", "source": { "type": "git", "url": "https://github.com/guzzle/guzzle.git", - "reference": "1110f66a6530a40fe7aea0378fe608ee2b2248f9" + "reference": "41042bc7ab002487b876a0683fc8dce04ddce104" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/guzzle/guzzle/zipball/1110f66a6530a40fe7aea0378fe608ee2b2248f9", - "reference": "1110f66a6530a40fe7aea0378fe608ee2b2248f9", + "url": "https://api.github.com/repos/guzzle/guzzle/zipball/41042bc7ab002487b876a0683fc8dce04ddce104", + "reference": "41042bc7ab002487b876a0683fc8dce04ddce104", "shasum": "" }, "require": { @@ -285,11 +285,11 @@ "psr/http-client-implementation": "1.0" }, "require-dev": { - "bamarni/composer-bin-plugin": "^1.8.1", + "bamarni/composer-bin-plugin": "^1.8.2", "ext-curl": "*", "php-http/client-integration-tests": "dev-master#2c025848417c1135031fdf9c728ee53d0a7ceaee as 3.0.999", "php-http/message-factory": "^1.1", - "phpunit/phpunit": "^8.5.29 || ^9.5.23", + "phpunit/phpunit": "^8.5.36 || ^9.6.15", "psr/log": "^1.1 || ^2.0 || ^3.0" }, "suggest": { @@ -367,7 +367,7 @@ ], "support": { "issues": "https://github.com/guzzle/guzzle/issues", - "source": "https://github.com/guzzle/guzzle/tree/7.8.0" + "source": "https://github.com/guzzle/guzzle/tree/7.8.1" }, "funding": [ { @@ -383,7 +383,7 @@ "type": "tidelift" } ], - "time": "2023-08-27T10:20:53+00:00" + "time": "2023-12-03T20:35:24+00:00" }, { "name": "guzzlehttp/promises", @@ -4405,5 +4405,5 @@ "prefer-lowest": false, "platform": [], "platform-dev": [], - "plugin-api-version": "2.3.0" + "plugin-api-version": "2.6.0" } diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index d09e9c70f..429e6e42f 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -340,17 +340,17 @@ }, { "name": "guzzlehttp/guzzle", - "version": "7.8.0", - "version_normalized": "7.8.0.0", + "version": "7.8.1", + "version_normalized": "7.8.1.0", "source": { "type": "git", "url": "https://github.com/guzzle/guzzle.git", - "reference": "1110f66a6530a40fe7aea0378fe608ee2b2248f9" + "reference": "41042bc7ab002487b876a0683fc8dce04ddce104" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/guzzle/guzzle/zipball/1110f66a6530a40fe7aea0378fe608ee2b2248f9", - "reference": "1110f66a6530a40fe7aea0378fe608ee2b2248f9", + "url": "https://api.github.com/repos/guzzle/guzzle/zipball/41042bc7ab002487b876a0683fc8dce04ddce104", + "reference": "41042bc7ab002487b876a0683fc8dce04ddce104", "shasum": "" }, "require": { @@ -365,11 +365,11 @@ "psr/http-client-implementation": "1.0" }, "require-dev": { - "bamarni/composer-bin-plugin": "^1.8.1", + "bamarni/composer-bin-plugin": "^1.8.2", "ext-curl": "*", "php-http/client-integration-tests": "dev-master#2c025848417c1135031fdf9c728ee53d0a7ceaee as 3.0.999", "php-http/message-factory": "^1.1", - "phpunit/phpunit": "^8.5.29 || ^9.5.23", + "phpunit/phpunit": "^8.5.36 || ^9.6.15", "psr/log": "^1.1 || ^2.0 || ^3.0" }, "suggest": { @@ -377,7 +377,7 @@ "ext-intl": "Required for Internationalized Domain Name (IDN) support", "psr/log": "Required for using the Log middleware" }, - "time": "2023-08-27T10:20:53+00:00", + "time": "2023-12-03T20:35:24+00:00", "type": "library", "extra": { "bamarni-bin": { @@ -449,7 +449,7 @@ ], "support": { "issues": "https://github.com/guzzle/guzzle/issues", - "source": "https://github.com/guzzle/guzzle/tree/7.8.0" + "source": "https://github.com/guzzle/guzzle/tree/7.8.1" }, "funding": [ { diff --git a/vendor/composer/installed.php b/vendor/composer/installed.php index 64364b741..dd4263db6 100644 --- a/vendor/composer/installed.php +++ b/vendor/composer/installed.php @@ -3,7 +3,7 @@ 'name' => '__root__', 'pretty_version' => 'dev-master', 'version' => 'dev-master', - 'reference' => '2b8e34453234b8b31ebc9e7020f8677bf3889898', + 'reference' => 'd4ae6c67db8c966ab4998fda6df14072b103106b', 'type' => 'library', 'install_path' => __DIR__ . '/../../', 'aliases' => array(), @@ -13,7 +13,7 @@ '__root__' => array( 'pretty_version' => 'dev-master', 'version' => 'dev-master', - 'reference' => '2b8e34453234b8b31ebc9e7020f8677bf3889898', + 'reference' => 'd4ae6c67db8c966ab4998fda6df14072b103106b', 'type' => 'library', 'install_path' => __DIR__ . '/../../', 'aliases' => array(), @@ -65,9 +65,9 @@ 'dev_requirement' => false, ), 'guzzlehttp/guzzle' => array( - 'pretty_version' => '7.8.0', - 'version' => '7.8.0.0', - 'reference' => '1110f66a6530a40fe7aea0378fe608ee2b2248f9', + 'pretty_version' => '7.8.1', + 'version' => '7.8.1.0', + 'reference' => '41042bc7ab002487b876a0683fc8dce04ddce104', 'type' => 'library', 'install_path' => __DIR__ . '/../guzzlehttp/guzzle', 'aliases' => array(), @@ -371,8 +371,8 @@ 'psr/http-client-implementation' => array( 'dev_requirement' => false, 'provided' => array( - 0 => '1.0', - 1 => '*', + 0 => '*', + 1 => '1.0', ), ), 'psr/http-factory' => array( diff --git a/vendor/guzzlehttp/guzzle/CHANGELOG.md b/vendor/guzzlehttp/guzzle/CHANGELOG.md index 990b86c9e..13709d1b8 100644 --- a/vendor/guzzlehttp/guzzle/CHANGELOG.md +++ b/vendor/guzzlehttp/guzzle/CHANGELOG.md @@ -3,6 +3,14 @@ Please refer to [UPGRADING](UPGRADING.md) guide for upgrading to a major version. +## 7.8.1 - 2023-12-03 + +### Changed + +- Updated links in docs to their canonical versions +- Replaced `call_user_func*` with native calls + + ## 7.8.0 - 2023-08-27 ### Added @@ -643,7 +651,8 @@ object). * Note: This has been changed in 5.0.3 to now encode query string values by default unless the `rawString` argument is provided when setting the query string on a URL: Now allowing many more characters to be present in the - query string without being percent encoded. See https://tools.ietf.org/html/rfc3986#appendix-A + query string without being percent encoded. See + https://datatracker.ietf.org/doc/html/rfc3986#appendix-A ## 5.0.1 - 2014-10-16 @@ -1182,7 +1191,7 @@ interfaces. ## 3.4.0 - 2013-04-11 -* Bug fix: URLs are now resolved correctly based on https://tools.ietf.org/html/rfc3986#section-5.2. #289 +* Bug fix: URLs are now resolved correctly based on https://datatracker.ietf.org/doc/html/rfc3986#section-5.2. #289 * Bug fix: Absolute URLs with a path in a service description will now properly override the base URL. #289 * Bug fix: Parsing a query string with a single PHP array value will now result in an array. #263 * Bug fix: Better normalization of the User-Agent header to prevent duplicate headers. #264. diff --git a/vendor/guzzlehttp/guzzle/README.md b/vendor/guzzlehttp/guzzle/README.md index 0786462b3..6d78a9309 100644 --- a/vendor/guzzlehttp/guzzle/README.md +++ b/vendor/guzzlehttp/guzzle/README.md @@ -3,7 +3,7 @@ # Guzzle, PHP HTTP client [![Latest Version](https://img.shields.io/github/release/guzzle/guzzle.svg?style=flat-square)](https://github.com/guzzle/guzzle/releases) -[![Build Status](https://img.shields.io/github/workflow/status/guzzle/guzzle/CI?label=ci%20build&style=flat-square)](https://github.com/guzzle/guzzle/actions?query=workflow%3ACI) +[![Build Status](https://img.shields.io/github/actions/workflow/status/guzzle/guzzle/ci.yml?label=ci%20build&style=flat-square)](https://github.com/guzzle/guzzle/actions?query=workflow%3ACI) [![Total Downloads](https://img.shields.io/packagist/dt/guzzlehttp/guzzle.svg?style=flat-square)](https://packagist.org/packages/guzzlehttp/guzzle) Guzzle is a PHP HTTP client that makes it easy to send HTTP requests and @@ -66,7 +66,7 @@ composer require guzzlehttp/guzzle | 4.x | EOL | `guzzlehttp/guzzle` | `GuzzleHttp` | [v4][guzzle-4-repo] | N/A | No | >=5.4,<7.0 | | 5.x | EOL | `guzzlehttp/guzzle` | `GuzzleHttp` | [v5][guzzle-5-repo] | [v5][guzzle-5-docs] | No | >=5.4,<7.4 | | 6.x | Security fixes only | `guzzlehttp/guzzle` | `GuzzleHttp` | [v6][guzzle-6-repo] | [v6][guzzle-6-docs] | Yes | >=5.5,<8.0 | -| 7.x | Latest | `guzzlehttp/guzzle` | `GuzzleHttp` | [v7][guzzle-7-repo] | [v7][guzzle-7-docs] | Yes | >=7.2.5,<8.3 | +| 7.x | Latest | `guzzlehttp/guzzle` | `GuzzleHttp` | [v7][guzzle-7-repo] | [v7][guzzle-7-docs] | Yes | >=7.2.5,<8.4 | [guzzle-3-repo]: https://github.com/guzzle/guzzle3 [guzzle-4-repo]: https://github.com/guzzle/guzzle/tree/4.x diff --git a/vendor/guzzlehttp/guzzle/UPGRADING.md b/vendor/guzzlehttp/guzzle/UPGRADING.md index 8fa0afb5d..4efbb5962 100644 --- a/vendor/guzzlehttp/guzzle/UPGRADING.md +++ b/vendor/guzzlehttp/guzzle/UPGRADING.md @@ -189,11 +189,11 @@ $client = new GuzzleHttp\Client(['handler' => $handler]); ## POST Requests -This version added the [`form_params`](http://guzzle.readthedocs.org/en/latest/request-options.html#form_params) +This version added the [`form_params`](https://docs.guzzlephp.org/en/latest/request-options.html#form_params) and `multipart` request options. `form_params` is an associative array of strings or array of strings and is used to serialize an `application/x-www-form-urlencoded` POST request. The -[`multipart`](http://guzzle.readthedocs.org/en/latest/request-options.html#multipart) +[`multipart`](https://docs.guzzlephp.org/en/latest/request-options.html#multipart) option is now used to send a multipart/form-data POST request. `GuzzleHttp\Post\PostFile` has been removed. Use the `multipart` option to add @@ -209,7 +209,7 @@ The `base_url` option has been renamed to `base_uri`. ## Rewritten Adapter Layer -Guzzle now uses [RingPHP](http://ringphp.readthedocs.org/en/latest) to send +Guzzle now uses [RingPHP](https://ringphp.readthedocs.org/en/latest) to send HTTP requests. The `adapter` option in a `GuzzleHttp\Client` constructor is still supported, but it has now been renamed to `handler`. Instead of passing a `GuzzleHttp\Adapter\AdapterInterface`, you must now pass a PHP @@ -575,7 +575,7 @@ You can intercept a request and inject a response using the `intercept()` event of a `GuzzleHttp\Event\BeforeEvent`, `GuzzleHttp\Event\CompleteEvent`, and `GuzzleHttp\Event\ErrorEvent` event. -See: http://docs.guzzlephp.org/en/latest/events.html +See: https://docs.guzzlephp.org/en/latest/events.html ## Inflection @@ -668,9 +668,9 @@ in separate repositories: The service description layer of Guzzle has moved into two separate packages: -- http://github.com/guzzle/command Provides a high level abstraction over web +- https://github.com/guzzle/command Provides a high level abstraction over web services by representing web service operations using commands. -- http://github.com/guzzle/guzzle-services Provides an implementation of +- https://github.com/guzzle/guzzle-services Provides an implementation of guzzle/command that provides request serialization and response parsing using Guzzle service descriptions. @@ -870,7 +870,7 @@ HeaderInterface (e.g. toArray(), getAll(), etc.). 3.3 to 3.4 ---------- -Base URLs of a client now follow the rules of https://tools.ietf.org/html/rfc3986#section-5.2.2 when merging URLs. +Base URLs of a client now follow the rules of https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.2 when merging URLs. 3.2 to 3.3 ---------- diff --git a/vendor/guzzlehttp/guzzle/composer.json b/vendor/guzzlehttp/guzzle/composer.json index 72defd614..69583d7cc 100644 --- a/vendor/guzzlehttp/guzzle/composer.json +++ b/vendor/guzzlehttp/guzzle/composer.json @@ -63,10 +63,10 @@ }, "require-dev": { "ext-curl": "*", - "bamarni/composer-bin-plugin": "^1.8.1", + "bamarni/composer-bin-plugin": "^1.8.2", "php-http/client-integration-tests": "dev-master#2c025848417c1135031fdf9c728ee53d0a7ceaee as 3.0.999", "php-http/message-factory": "^1.1", - "phpunit/phpunit": "^8.5.29 || ^9.5.23", + "phpunit/phpunit": "^8.5.36 || ^9.6.15", "psr/log": "^1.1 || ^2.0 || ^3.0" }, "suggest": { diff --git a/vendor/guzzlehttp/guzzle/src/Cookie/CookieJar.php b/vendor/guzzlehttp/guzzle/src/Cookie/CookieJar.php index fa2b10a8c..c29b4b7e9 100644 --- a/vendor/guzzlehttp/guzzle/src/Cookie/CookieJar.php +++ b/vendor/guzzlehttp/guzzle/src/Cookie/CookieJar.php @@ -243,7 +243,7 @@ class CookieJar implements CookieJarInterface /** * Computes cookie path following RFC 6265 section 5.1.4 * - * @see https://tools.ietf.org/html/rfc6265#section-5.1.4 + * @see https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.4 */ private function getCookiePathFromRequest(RequestInterface $request): string { diff --git a/vendor/guzzlehttp/guzzle/src/Cookie/SetCookie.php b/vendor/guzzlehttp/guzzle/src/Cookie/SetCookie.php index d74915bed..c9806da88 100644 --- a/vendor/guzzlehttp/guzzle/src/Cookie/SetCookie.php +++ b/vendor/guzzlehttp/guzzle/src/Cookie/SetCookie.php @@ -420,7 +420,7 @@ class SetCookie } // Remove the leading '.' as per spec in RFC 6265. - // https://tools.ietf.org/html/rfc6265#section-5.2.3 + // https://datatracker.ietf.org/doc/html/rfc6265#section-5.2.3 $cookieDomain = \ltrim(\strtolower($cookieDomain), '.'); $domain = \strtolower($domain); @@ -431,7 +431,7 @@ class SetCookie } // Matching the subdomain according to RFC 6265. - // https://tools.ietf.org/html/rfc6265#section-5.1.3 + // https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.3 if (\filter_var($domain, \FILTER_VALIDATE_IP)) { return false; } diff --git a/vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php b/vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php index be88d9e49..16a942232 100644 --- a/vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php +++ b/vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php @@ -256,7 +256,7 @@ class CurlFactory implements CurlFactoryInterface $method = $easy->request->getMethod(); if ($method === 'PUT' || $method === 'POST') { - // See https://tools.ietf.org/html/rfc7230#section-3.3.2 + // See https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2 if (!$easy->request->hasHeader('Content-Length')) { $conf[\CURLOPT_HTTPHEADER][] = 'Content-Length: 0'; } diff --git a/vendor/guzzlehttp/guzzle/src/RequestOptions.php b/vendor/guzzlehttp/guzzle/src/RequestOptions.php index bf3b02b6b..a38768c0c 100644 --- a/vendor/guzzlehttp/guzzle/src/RequestOptions.php +++ b/vendor/guzzlehttp/guzzle/src/RequestOptions.php @@ -5,9 +5,7 @@ namespace GuzzleHttp; /** * This class contains a list of built-in Guzzle request options. * - * More documentation for each option can be found at http://guzzlephp.org/. - * - * @see http://docs.guzzlephp.org/en/v6/request-options.html + * @see https://docs.guzzlephp.org/en/latest/request-options.html */ final class RequestOptions { diff --git a/vendor/guzzlehttp/guzzle/src/Utils.php b/vendor/guzzlehttp/guzzle/src/Utils.php index fcf571d6b..93d6d39cd 100644 --- a/vendor/guzzlehttp/guzzle/src/Utils.php +++ b/vendor/guzzlehttp/guzzle/src/Utils.php @@ -176,14 +176,13 @@ No system CA bundle could be found in any of the the common system locations. PHP versions earlier than 5.6 are not properly configured to use the system's CA bundle by default. In order to verify peer certificates, you will need to supply the path on disk to a certificate bundle to the 'verify' request -option: http://docs.guzzlephp.org/en/latest/clients.html#verify. If you do not -need a specific certificate bundle, then Mozilla provides a commonly used CA -bundle which can be downloaded here (provided by the maintainer of cURL): -https://curl.haxx.se/ca/cacert.pem. Once -you have a CA bundle available on disk, you can set the 'openssl.cafile' PHP -ini setting to point to the path to the file, allowing you to omit the 'verify' -request option. See https://curl.haxx.se/docs/sslcerts.html for more -information. +option: https://docs.guzzlephp.org/en/latest/request-options.html#verify. If +you do not need a specific certificate bundle, then Mozilla provides a commonly +used CA bundle which can be downloaded here (provided by the maintainer of +cURL): https://curl.haxx.se/ca/cacert.pem. Once you have a CA bundle available +on disk, you can set the 'openssl.cafile' PHP ini setting to point to the path +to the file, allowing you to omit the 'verify' request option. See +https://curl.haxx.se/docs/sslcerts.html for more information. EOT ); } From ff59fbd460aa549ffe959a087f892eadbbe5a2c0 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 15:34:21 +0000 Subject: [PATCH 2/8] Add back 'any auth' retry in UrlHelper::fetch() --- classes/UrlHelper.php | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index b7583c1e9..e69085052 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -231,7 +231,8 @@ class UrlHelper { // TODO: max_size currently only works for CURL transfers // TODO: multiple-argument way is deprecated, first parameter is a hash now public static function fetch($options /* previously: 0: $url , 1: $type = false, 2: $login = false, 3: $pass = false, - 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false, 8: $retry_once_request = false */) { + 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false, 8: $retry_once_request = false, + 9: $auth_type = "basic" */) { $span = Tracer::start(__METHOD__); $span->setAttribute('func.args', json_encode(func_get_args())); @@ -246,7 +247,7 @@ class UrlHelper { if (!is_array($options)) { // falling back on compatibility shim - $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent", "retry-once-request" ]; + $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent", "retry-once-request", "auth_type" ]; $tmp = []; for ($i = 0; $i < func_num_args(); $i++) { @@ -265,6 +266,7 @@ class UrlHelper { "timestamp" => @func_get_arg(6), "useragent" => @func_get_arg(7), "retry-once-request" => @func_get_arg(8), + "auth_type" => @func_get_arg(9), ); */ } @@ -272,6 +274,7 @@ class UrlHelper { $type = isset($options["type"]) ? $options["type"] : false; $login = isset($options["login"]) ? $options["login"] : false; $pass = isset($options["pass"]) ? $options["pass"] : false; + $auth_type = isset($options["auth_type"]) ? $options["auth_type"] : "basic"; $post_query = isset($options["post_query"]) ? $options["post_query"] : false; $timeout = isset($options["timeout"]) ? $options["timeout"] : false; $last_modified = isset($options["last_modified"]) ? $options["last_modified"] : ""; @@ -312,6 +315,7 @@ class UrlHelper { GuzzleHttp\RequestOptions::HEADERS => [ 'User-Agent' => $useragent ?: Config::get_user_agent(), ], + 'curl' => [], ]; if ($last_modified && !$post_query) @@ -323,8 +327,15 @@ class UrlHelper { if ($http_referrer) $req_options[GuzzleHttp\RequestOptions::HEADERS]['Referer'] = $http_referrer; - if ($login && $pass) + if ($login && $pass && in_array($auth_type, ['basic', 'digest', 'ntlm'])) { + // Let Guzzle handle the details for auth types it supports $req_options[GuzzleHttp\RequestOptions::AUTH] = [$login, $pass]; + } elseif ($auth_type === 'any') { + // https://docs.guzzlephp.org/en/stable/faq.html#how-can-i-add-custom-curl-options + $req_options['curl'][\CURLOPT_HTTPAUTH] = \CURLAUTH_ANY; + if ($login && $pass) + $req_options['curl'][\CURLOPT_USERPWD] = "$login:$pass"; + } if ($post_query) $req_options[GuzzleHttp\RequestOptions::FORM_PARAMS] = $post_query; @@ -359,7 +370,7 @@ class UrlHelper { $response = $client->request($post_query ? 'POST' : 'GET', $url, $req_options); } } catch (\LengthException $ex) { - // 'Content-Length' exceeded the download limit + // Either 'Content-Length' indicated the download limit would be exceeded, or the transfer actually exceeded the download limit. self::$fetch_last_error = (string) $ex; $span->setAttribute('error', self::$fetch_last_error); $span->end(); @@ -372,9 +383,14 @@ class UrlHelper { // 4xx or 5xx self::$fetch_last_error_code = $ex->getResponse()->getStatusCode(); - # TODO: Retry with CURLAUTH_ANY if the response code is 403? Was this actually an issue before? - # https://docs.guzzlephp.org/en/stable/faq.html#how-can-i-add-custom-curl-options - // if (self::$fetch_last_error_code === 403) {} + // If credentials were provided and we got a 403 back, retry once with auth type 'any' + // to attempt compatibility with unusual configurations. + if ($login && $pass && self::$fetch_last_error_code === 403 + && isset($options['auth_type']) && $options['auth_type'] !== 'any') { + $options['auth_type'] = 'any'; + $span->end(); + return self::fetch($options); + } self::$fetch_last_content_type = $ex->getResponse()->getHeaderLine('content-type'); @@ -389,6 +405,7 @@ class UrlHelper { if (($errno === \CURLE_WRITE_ERROR || $errno === \CURLE_BAD_CONTENT_ENCODING) && !array_key_exists('retry-once-request', $options)) { $options['retry-once-request'] = $ex->getRequest()->withHeader('Accept-Encoding', 'none'); + $span->end(); return self::fetch($options); } } From d82da74363adf4d96eb8b3f61f6eee07ee363e3a Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 15:48:02 +0000 Subject: [PATCH 3/8] Clean up UrlHelper::resolve_redirects(). Also: this doesn't appear to be used... but maybe in some plugin? --- classes/UrlHelper.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index e69085052..1bd7bfca1 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -205,15 +205,15 @@ class UrlHelper { $response = $client->request('HEAD', $url, [ GuzzleHttp\RequestOptions::CONNECT_TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT), GuzzleHttp\RequestOptions::TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_TIMEOUT), - GuzzleHttp\RequestOptions::ALLOW_REDIRECTS => ['max' => 10, 'track_redirects' => true, 'http_errors' => false], + GuzzleHttp\RequestOptions::ALLOW_REDIRECTS => ['max' => 10, 'track_redirects' => true], + GuzzleHttp\RequestOptions::HTTP_ERRORS => false, GuzzleHttp\RequestOptions::HEADERS => [ 'User-Agent' => Config::get_user_agent(), 'Connection' => 'close', ], ]); - } catch (GuzzleHttp\Exception\GuzzleException $ex) { - // TODO: catch just the "too many redirects" exception, and set a different 'error' for general issues - $span->setAttribute('error', 'too many redirects'); + } catch (Exception $ex) { + $span->setAttribute('error', (string) $ex); $span->end(); return false; } From 9132360d4682cdde0b1bf88c7960137700dec50d Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 16:27:12 +0000 Subject: [PATCH 4/8] Rework content encoding error retrying in UrlHelper::fetch() --- classes/UrlHelper.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index 1bd7bfca1..e6b3b0aad 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -231,7 +231,7 @@ class UrlHelper { // TODO: max_size currently only works for CURL transfers // TODO: multiple-argument way is deprecated, first parameter is a hash now public static function fetch($options /* previously: 0: $url , 1: $type = false, 2: $login = false, 3: $pass = false, - 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false, 8: $retry_once_request = false, + 4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false, 8: $encoding = false, 9: $auth_type = "basic" */) { $span = Tracer::start(__METHOD__); $span->setAttribute('func.args', json_encode(func_get_args())); @@ -247,7 +247,7 @@ class UrlHelper { if (!is_array($options)) { // falling back on compatibility shim - $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent", "retry-once-request", "auth_type" ]; + $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent", "encoding", "auth_type" ]; $tmp = []; for ($i = 0; $i < func_num_args(); $i++) { @@ -265,7 +265,7 @@ class UrlHelper { "timeout" => @func_get_arg(5), "timestamp" => @func_get_arg(6), "useragent" => @func_get_arg(7), - "retry-once-request" => @func_get_arg(8), + "encoding" => @func_get_arg(8), "auth_type" => @func_get_arg(9), ); */ } @@ -283,6 +283,7 @@ class UrlHelper { $max_size = isset($options["max_size"]) ? $options["max_size"] : Config::get(Config::MAX_DOWNLOAD_FILE_SIZE); // in bytes $http_accept = isset($options["http_accept"]) ? $options["http_accept"] : false; $http_referrer = isset($options["http_referrer"]) ? $options["http_referrer"] : false; + $encoding = isset($options["encoding"]) ? $options["encoding"] : false; $url = ltrim($url, ' '); $url = str_replace(' ', '%20', $url); @@ -324,6 +325,9 @@ class UrlHelper { if ($http_accept) $req_options[GuzzleHttp\RequestOptions::HEADERS]['Accept'] = $http_accept; + if ($encoding) + $req_options[GuzzleHttp\RequestOptions::HEADERS]['Accept-Encoding'] = $encoding; + if ($http_referrer) $req_options[GuzzleHttp\RequestOptions::HEADERS]['Referer'] = $http_referrer; @@ -364,11 +368,7 @@ class UrlHelper { $client = self::get_client(); try { - if (($options['retry-once-request'] ?? null) instanceof Psr\Http\Message\RequestInterface) { - $response = $client->send($options['retry-once-request']); - } else { - $response = $client->request($post_query ? 'POST' : 'GET', $url, $req_options); - } + $response = $client->request($post_query ? 'POST' : 'GET', $url, $req_options); } catch (\LengthException $ex) { // Either 'Content-Length' indicated the download limit would be exceeded, or the transfer actually exceeded the download limit. self::$fetch_last_error = (string) $ex; @@ -403,8 +403,8 @@ class UrlHelper { // responses with `Content-Encoding` is automatically attempted. If this fails, we do a // single retry with `Accept-Encoding: none` to try and force an unencoded response. if (($errno === \CURLE_WRITE_ERROR || $errno === \CURLE_BAD_CONTENT_ENCODING) && - !array_key_exists('retry-once-request', $options)) { - $options['retry-once-request'] = $ex->getRequest()->withHeader('Accept-Encoding', 'none'); + $ex->getRequest()->getHeaderLine('accept-encoding') !== 'none') { + $options['encoding'] = 'none'; $span->end(); return self::fetch($options); } From e33b0297d59a47fc5819ce79bdf71d29ad5e88b5 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 16:59:29 +0000 Subject: [PATCH 5/8] Ensure the feed name is easily visible when looking at the feeds with errors list. --- js/CommonDialogs.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/CommonDialogs.js b/js/CommonDialogs.js index 8d4241ef8..989a61539 100644 --- a/js/CommonDialogs.js +++ b/js/CommonDialogs.js @@ -268,7 +268,7 @@ const CommonDialogs = { ${reply.map((row) => ` - +
From 3c171cc92c2c62167b338b9eb79bb1ff973fd356 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 19:52:56 +0000 Subject: [PATCH 6/8] Add some tests for UrlHelper::fetch() --- classes/UrlHelper.php | 5 ++- tests/UrlHelperTest.php | 68 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index e6b3b0aad..d088a355b 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -18,7 +18,7 @@ class UrlHelper { static string $fetch_effective_url; static string $fetch_effective_ip_addr; - private static ?GuzzleHttp\ClientInterface $client = null; + public static ?GuzzleHttp\ClientInterface $client = null; private static function get_client(): GuzzleHttp\ClientInterface { if (self::$client == null) { @@ -385,8 +385,7 @@ class UrlHelper { // If credentials were provided and we got a 403 back, retry once with auth type 'any' // to attempt compatibility with unusual configurations. - if ($login && $pass && self::$fetch_last_error_code === 403 - && isset($options['auth_type']) && $options['auth_type'] !== 'any') { + if ($login && $pass && self::$fetch_last_error_code === 403 && $auth_type !== 'any') { $options['auth_type'] = 'any'; $span->end(); return self::fetch($options); diff --git a/tests/UrlHelperTest.php b/tests/UrlHelperTest.php index fe4eb5db2..2170ed50b 100644 --- a/tests/UrlHelperTest.php +++ b/tests/UrlHelperTest.php @@ -1,5 +1,11 @@ assertEquals( 'magnet:?xt=urn:btih:...', - UrlHelper::rewrite_relative('http://example.com/example/', + UrlHelper::rewrite_relative( + 'http://example.com/example/', 'magnet:?xt=urn:btih:...', - "a", "href", "") + "a", + "href", + "" + ) ); // disallowed magnet $this->assertEquals( 'http://example.com?xt=urn:btih:...', - UrlHelper::rewrite_relative('http://example.com/example/', - 'magnet:?xt=urn:btih:...') + UrlHelper::rewrite_relative( + 'http://example.com/example/', + 'magnet:?xt=urn:btih:...' + ) ); $this->assertEquals( @@ -49,6 +61,54 @@ final class UrlHelperTest extends TestCase { 'http://www.example.com/test', UrlHelper::rewrite_relative('http://www.example.com/test2 ', 'http://www.example.com/test') ); + } + public function test_fetch(): void { + $mock = new MockHandler(); + + UrlHelper::$client = new Client([ + 'handler' => HandlerStack::create($mock), + ]); + + $mock->append(new Response(200, [], 'Hello, World')); + $result = UrlHelper::fetch('https://www.example.com'); + $this->assertEquals(200, UrlHelper::$fetch_last_error_code); + $this->assertEquals('Hello, World', $result); + + foreach (['ftp://ftp.example.com', 'http://127.0.0.1', 'blah', '', 42, null] as $url) { + $result = UrlHelper::fetch($url); + $this->assertFalse($result); + } + + $mock->append(new Response(200, ['Content-Length' => PHP_INT_MAX])); + $result = UrlHelper::fetch('https://www.example.com/very-large-content-length'); + $this->assertFalse($result); + + $mock->append(new Response(301, ['Location' => 'https://www.example.com'])); + $result = UrlHelper::fetch(['url' => 'https://example.com', 'followlocation' => false]); + $this->assertFalse($result); + + $mock->append( + new Response(301, ['Location' => 'http://127.0.0.1']), + new Response(200, [], 'Hello, World'), + ); + $result = UrlHelper::fetch(['url' => 'https://example.com', 'followlocation' => true]); + $this->assertFalse($result); + $this->assertEquals('URL received after redirection failed extended validation.', UrlHelper::$fetch_last_error); + $this->assertEquals('http://127.0.0.1', UrlHelper::$fetch_effective_url); + + $mock->append(new Response(200, [], '')); + $result = UrlHelper::fetch('https://www.example.com'); + $this->assertFalse($result); + $this->assertEquals('Successful response, but no content was received.', UrlHelper::$fetch_last_error); + + // Currently failing with `Error: Undefined constant "CURLOPT_HTTPAUTH"`. + // $mock->append( + // new Response(403, []), + // new Response(200, [], 'Hello, World'), + // ); + // $result = UrlHelper::fetch(['url' => 'https://example.com/requires-credentials', 'login' => 'some_username', 'pass' => 'some_password']); + // $this->assertEquals(200, UrlHelper::$fetch_last_error_code); + // $this->assertEquals('Hello, World', $result); } } From 9a1f7c2ebfe4470440097c403cec2da011f22d02 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sat, 23 Dec 2023 19:58:39 +0000 Subject: [PATCH 7/8] Appease PHPStan in UrlHelperTest --- tests/UrlHelperTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/UrlHelperTest.php b/tests/UrlHelperTest.php index 2170ed50b..30a0201c1 100644 --- a/tests/UrlHelperTest.php +++ b/tests/UrlHelperTest.php @@ -80,7 +80,7 @@ final class UrlHelperTest extends TestCase { $this->assertFalse($result); } - $mock->append(new Response(200, ['Content-Length' => PHP_INT_MAX])); + $mock->append(new Response(200, ['Content-Length' => (string) PHP_INT_MAX])); $result = UrlHelper::fetch('https://www.example.com/very-large-content-length'); $this->assertFalse($result); From 0ea9db317038f5510a1ca875b55af770997ec148 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sun, 24 Dec 2023 11:21:43 +0000 Subject: [PATCH 8/8] Fix specifying auth type in UrlHelper::fetch(), add a test for 403 auth retry. --- classes/UrlHelper.php | 2 +- tests/UrlHelperTest.php | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index d088a355b..03202cff8 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -333,7 +333,7 @@ class UrlHelper { if ($login && $pass && in_array($auth_type, ['basic', 'digest', 'ntlm'])) { // Let Guzzle handle the details for auth types it supports - $req_options[GuzzleHttp\RequestOptions::AUTH] = [$login, $pass]; + $req_options[GuzzleHttp\RequestOptions::AUTH] = [$login, $pass, $auth_type]; } elseif ($auth_type === 'any') { // https://docs.guzzlephp.org/en/stable/faq.html#how-can-i-add-custom-curl-options $req_options['curl'][\CURLOPT_HTTPAUTH] = \CURLAUTH_ANY; diff --git a/tests/UrlHelperTest.php b/tests/UrlHelperTest.php index 30a0201c1..58960add0 100644 --- a/tests/UrlHelperTest.php +++ b/tests/UrlHelperTest.php @@ -102,13 +102,19 @@ final class UrlHelperTest extends TestCase { $this->assertFalse($result); $this->assertEquals('Successful response, but no content was received.', UrlHelper::$fetch_last_error); - // Currently failing with `Error: Undefined constant "CURLOPT_HTTPAUTH"`. - // $mock->append( - // new Response(403, []), - // new Response(200, [], 'Hello, World'), - // ); - // $result = UrlHelper::fetch(['url' => 'https://example.com/requires-credentials', 'login' => 'some_username', 'pass' => 'some_password']); - // $this->assertEquals(200, UrlHelper::$fetch_last_error_code); - // $this->assertEquals('Hello, World', $result); + // Fake a 403 for basic auth and success with `CURLAUTH_ANY` in the retry attempt + $mock->append( + new Response(403, []), + new Response(200, [], 'Hello, World'), + ); + $result = UrlHelper::fetch([ + 'url' => 'https://example.com/requires-credentials', + 'login' => 'some_username', + 'pass' => 'some_password', + 'auth_type' => 'basic', + ]); + $this->assertEquals(200, UrlHelper::$fetch_last_error_code); + $this->assertEquals('Hello, World', $result); + $this->assertEquals($mock->getLastOptions()['curl'][\CURLOPT_HTTPAUTH], \CURLAUTH_ANY); } }