From 91a91dac15907bbd9bd773da9e5c6f0b7ff19f5a Mon Sep 17 00:00:00 2001 From: wn_ Date: Fri, 29 Dec 2023 00:31:03 +0000 Subject: [PATCH] Perform validation of redirect URLs during the redirect process. Previously, validation was only done after all redirects and the final request had completed. This approach ensures all redirects are to URLs that pass extended validation. --- classes/UrlHelper.php | 25 +++++++++++++++++++++++-- tests/UrlHelperTest.php | 13 ++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/classes/UrlHelper.php b/classes/UrlHelper.php index 03202cff8..82b8bf36e 100644 --- a/classes/UrlHelper.php +++ b/classes/UrlHelper.php @@ -1,4 +1,9 @@ $timeout ?: Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT), GuzzleHttp\RequestOptions::TIMEOUT => $timeout ?: Config::get(Config::FILE_FETCH_TIMEOUT), - GuzzleHttp\RequestOptions::ALLOW_REDIRECTS => $followlocation ? ['max' => 20, 'track_redirects' => true] : false, GuzzleHttp\RequestOptions::HEADERS => [ 'User-Agent' => $useragent ?: Config::get_user_agent(), ], 'curl' => [], ]; + if ($followlocation) { + $req_options[GuzzleHttp\RequestOptions::ALLOW_REDIRECTS] = [ + 'max' => 20, + 'track_redirects' => true, + 'on_redirect' => function(RequestInterface $request, ResponseInterface $response, UriInterface $uri) { + if (!self::validate($uri, true)) { + self::$fetch_effective_url = (string) $uri; + throw new GuzzleHttp\Exception\RequestException('URL received during redirection failed extended validation.', + $request, $response); + } + }, + ]; + } else { + $req_options[GuzzleHttp\RequestOptions::ALLOW_REDIRECTS] = false; + } + if ($last_modified && !$post_query) $req_options[GuzzleHttp\RequestOptions::HEADERS]['If-Modified-Since'] = $last_modified; @@ -355,7 +375,7 @@ class UrlHelper { }; # Alternative/supplement to `progress` checking - $req_options[GuzzleHttp\RequestOptions::ON_HEADERS] = function(Psr\Http\Message\ResponseInterface $response) use(&$max_size, $url) { + $req_options[GuzzleHttp\RequestOptions::ON_HEADERS] = function(ResponseInterface $response) use(&$max_size, $url) { $content_length = $response->getHeaderLine('Content-Length'); if ($content_length > $max_size) { Debug::log("[UrlHelper] fetch error: server indicated (via 'Content-Length: {$content_length}') max size of $max_size bytes " . @@ -426,6 +446,7 @@ class UrlHelper { $history_header = $response->getHeader(GuzzleHttp\RedirectMiddleware::HISTORY_HEADER); self::$fetch_effective_url = $history_header ? end($history_header) : $url; + // This shouldn't be necessary given the checks that occur during potential redirects, but we'll do it anyway. if (!self::validate(self::$fetch_effective_url, true)) { self::$fetch_last_error = "URL received after redirection failed extended validation."; $span->setAttribute('error', self::$fetch_last_error); diff --git a/tests/UrlHelperTest.php b/tests/UrlHelperTest.php index 58960add0..825c71e8e 100644 --- a/tests/UrlHelperTest.php +++ b/tests/UrlHelperTest.php @@ -74,6 +74,7 @@ final class UrlHelperTest extends TestCase { $result = UrlHelper::fetch('https://www.example.com'); $this->assertEquals(200, UrlHelper::$fetch_last_error_code); $this->assertEquals('Hello, World', $result); + $mock->reset(); foreach (['ftp://ftp.example.com', 'http://127.0.0.1', 'blah', '', 42, null] as $url) { $result = UrlHelper::fetch($url); @@ -83,24 +84,25 @@ final class UrlHelperTest extends TestCase { $mock->append(new Response(200, ['Content-Length' => (string) PHP_INT_MAX])); $result = UrlHelper::fetch('https://www.example.com/very-large-content-length'); $this->assertFalse($result); + $mock->reset(); $mock->append(new Response(301, ['Location' => 'https://www.example.com'])); $result = UrlHelper::fetch(['url' => 'https://example.com', 'followlocation' => false]); $this->assertFalse($result); + $mock->reset(); - $mock->append( - new Response(301, ['Location' => 'http://127.0.0.1']), - new Response(200, [], 'Hello, World'), - ); + $mock->append(new Response(301, ['Location' => 'http://127.0.0.1'])); $result = UrlHelper::fetch(['url' => 'https://example.com', 'followlocation' => true]); $this->assertFalse($result); - $this->assertEquals('URL received after redirection failed extended validation.', UrlHelper::$fetch_last_error); + $this->assertMatchesRegularExpression('%failed extended validation%', UrlHelper::$fetch_last_error); $this->assertEquals('http://127.0.0.1', UrlHelper::$fetch_effective_url); + $mock->reset(); $mock->append(new Response(200, [], '')); $result = UrlHelper::fetch('https://www.example.com'); $this->assertFalse($result); $this->assertEquals('Successful response, but no content was received.', UrlHelper::$fetch_last_error); + $mock->reset(); // Fake a 403 for basic auth and success with `CURLAUTH_ANY` in the retry attempt $mock->append( @@ -116,5 +118,6 @@ final class UrlHelperTest extends TestCase { $this->assertEquals(200, UrlHelper::$fetch_last_error_code); $this->assertEquals('Hello, World', $result); $this->assertEquals($mock->getLastOptions()['curl'][\CURLOPT_HTTPAUTH], \CURLAUTH_ANY); + $mock->reset(); } }