[core] Change how `Cookie` headers are handled

Cookies are now saved and loaded under `cookies` key in the info dict
instead of `http_headers.Cookie`. Cookies passed in headers are
auto-scoped to the input URLs with a warning.

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2023-07-06 21:51:04 +05:30 committed by pukkandan
parent f8b4bcc0a7
commit 3121512228
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
3 changed files with 139 additions and 4 deletions

View File

@ -1213,6 +1213,62 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'Video') self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video') self.assertEqual(downloaded['extractor_key'], 'Video')
def test_header_cookies(self):
from http.cookiejar import Cookie
ydl = FakeYDL()
ydl.report_warning = lambda *_, **__: None
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
return Cookie(
version or 0, name, value, None, False,
domain, bool(domain), bool(domain), path, bool(path),
secure, expires, False, None, None, rest={})
_test_url = 'https://yt.dlp/test'
def test(encoded_cookies, cookies, headers=False, round_trip=None, error=None):
def _test():
ydl.cookiejar.clear()
ydl._load_cookies(encoded_cookies, from_headers=headers)
if headers:
ydl._apply_header_cookies(_test_url)
data = {'url': _test_url}
ydl._calc_headers(data)
self.assertCountEqual(
map(vars, ydl.cookiejar), map(vars, cookies),
'Extracted cookiejar.Cookie is not the same')
if not headers:
self.assertEqual(
data.get('cookies'), round_trip or encoded_cookies,
'Cookie is not the same as round trip')
ydl.__dict__['_YoutubeDL__header_cookies'] = []
with self.subTest(msg=encoded_cookies):
if not error:
_test()
return
with self.assertRaisesRegex(Exception, error):
_test()
test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
test('test=value', [cookie('test', 'value')], error='Unscoped cookies are not allowed')
test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
test('test="value; "; path=/test; domain=.yt.dlp', [
cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
round_trip='name=""; Domain=.yt.dlp')
test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error='Invalid syntax')
ydl.deprecated_feature = ydl.report_error
test('test=value', [], headers=True, error='Passing cookies as a header is a potential security risk')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,9 +1,11 @@
import collections import collections
import contextlib import contextlib
import copy
import datetime import datetime
import errno import errno
import fileinput import fileinput
import functools import functools
import http.cookiejar
import io import io
import itertools import itertools
import json import json
@ -25,7 +27,7 @@ import unicodedata
from .cache import Cache from .cache import Cache
from .compat import urllib # isort: split from .compat import urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor from .extractor import gen_extractor_classes, get_info_extractor
@ -673,6 +675,9 @@ class YoutubeDL:
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
self.__header_cookies = []
self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
def check_deprecated(param, option, suggestion): def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None: if self.params.get(param) is not None:
self.report_warning(f'{option} is deprecated. Use {suggestion} instead') self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@ -1625,8 +1630,60 @@ class YoutubeDL:
self.to_screen('') self.to_screen('')
raise raise
def _load_cookies(self, data, *, from_headers=True):
"""Loads cookies from a `Cookie` header
This tries to work around the security vulnerability of passing cookies to every domain.
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
The unscoped cookies are saved for later to be stored in the jar with a limited scope.
@param data The Cookie header as string to load the cookies from
@param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
"""
for cookie in LenientSimpleCookie(data).values():
if from_headers and any(cookie.values()):
raise ValueError('Invalid syntax in Cookie Header')
domain = cookie.get('domain') or ''
expiry = cookie.get('expires')
if expiry == '': # 0 is valid
expiry = None
prepared_cookie = http.cookiejar.Cookie(
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
cookie.get('secure') or False, expiry, False, None, None, {})
if domain:
self.cookiejar.set_cookie(prepared_cookie)
elif from_headers:
self.deprecated_feature(
'Passing cookies as a header is a potential security risk; '
'they will be scoped to the domain of the downloaded urls. '
'Please consider loading cookies from a file or browser instead.')
self.__header_cookies.append(prepared_cookie)
else:
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
tb=False, is_error=False)
def _apply_header_cookies(self, url):
"""Applies stray header cookies to the provided url
This loads header cookies and scopes them to the domain provided in `url`.
While this is not ideal, it helps reduce the risk of them being sent
to an unintended destination while mostly maintaining compatibility.
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
return
for cookie in map(copy.copy, self.__header_cookies):
cookie.domain = f'.{parsed.hostname}'
self.cookiejar.set_cookie(cookie)
@_handle_extraction_exceptions @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process): def __extract_info(self, url, ie, download, extra_info, process):
self._apply_header_cookies(url)
try: try:
ie_result = ie.extract(url) ie_result = ie.extract(url)
except UserNotLive as e: except UserNotLive as e:
@ -2414,9 +2471,24 @@ class YoutubeDL:
if 'Youtubedl-No-Compression' in res: # deprecated if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None) res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity' res['Accept-Encoding'] = 'identity'
cookies = self.cookiejar.get_cookie_header(info_dict['url']) cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies: if cookies:
res['Cookie'] = cookies encoder = LenientSimpleCookie()
values = []
for cookie in cookies:
_, value = encoder.value_encode(cookie.value)
values.append(f'{cookie.name}={value}')
if cookie.domain:
values.append(f'Domain={cookie.domain}')
if cookie.path:
values.append(f'Path={cookie.path}')
if cookie.secure:
values.append('Secure')
if cookie.expires:
values.append(f'Expires={cookie.expires}')
if cookie.version:
values.append(f'Version={cookie.version}')
info_dict['cookies'] = '; '.join(values)
if 'X-Forwarded-For' not in res: if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@ -3423,6 +3495,8 @@ class YoutubeDL:
infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
for info in variadic(json.loads('\n'.join(f)))] for info in variadic(json.loads('\n'.join(f)))]
for info in infos: for info in infos:
self._load_cookies(info.get('cookies'), from_headers=False)
self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
try: try:
self.__download_wrapper(self.process_ie_result)(info, download=True) self.__download_wrapper(self.process_ie_result)(info, download=True)
except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

View File

@ -32,6 +32,7 @@ from ..utils import (
timetuple_from_msec, timetuple_from_msec,
try_call, try_call,
) )
from ..utils.traversal import traverse_obj
class FileDownloader: class FileDownloader:
@ -419,7 +420,6 @@ class FileDownloader:
"""Download to a filename using the info from info_dict """Download to a filename using the info from info_dict
Return True on success and False otherwise Return True on success and False otherwise
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(encodeFilename(filename))
@ -453,6 +453,11 @@ class FileDownloader:
self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
time.sleep(sleep_interval) time.sleep(sleep_interval)
# Filter the `Cookie` header from the info_dict to prevent leaks.
# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
info_dict['http_headers'] = dict(traverse_obj(info_dict, (
'http_headers', {dict.items}, lambda _, pair: pair[0].lower() != 'cookie'))) or None
ret = self.real_download(filename, info_dict) ret = self.real_download(filename, info_dict)
self._finish_multiline_status() self._finish_multiline_status()
return ret, True return ret, True