diff --git a/include/functions.php b/include/functions.php index d58f03ab1..750707626 100644 --- a/include/functions.php +++ b/include/functions.php @@ -63,6 +63,11 @@ define_default('MAX_CONDITIONAL_INTERVAL', 3600*12); // max interval between forced unconditional updates for servers // not complying with http if-modified-since (seconds) + define_default('MAX_FETCH_REQUESTS_PER_HOST', 25); + // a maximum amount of allowed HTTP requests per destination host + // during a single update (i.e. within PHP process lifetime) + // this is used to not cause excessive load on the origin server on + // e.g. feed subscription when all articles are being processes /* tunables end here */ @@ -171,6 +176,7 @@ global $fetch_last_modified; global $fetch_effective_url; global $fetch_curl_used; + global $fetch_domain_hits; $fetch_last_error = false; $fetch_last_error_code = -1; @@ -180,6 +186,9 @@ $fetch_last_modified = ""; $fetch_effective_url = ""; + if (!is_array($fetch_domain_hits)) + $fetch_domain_hits = []; + if (!is_array($options)) { // falling back on compatibility shim @@ -222,6 +231,14 @@ if (strpos($url, "//") === 0) $url = 'http:' . $url; + $url_host = parse_url($url, PHP_URL_HOST); + $fetch_domain_hits[$url_host] += 1; + + if ($fetch_domain_hits[$url_host] > MAX_FETCH_REQUESTS_PER_HOST) { + user_error("Exceeded fetch request quota for $url_host: " . $fetch_domain_hits[$url_host], E_USER_WARNING); + #return false; + } + if (!defined('NO_CURL') && function_exists('curl_init') && !ini_get("open_basedir")) { $fetch_curl_used = true;