From fca6dba8b80286ae6d3ca0a60c4799c220a52650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Feb 2020 19:08:44 +0700 Subject: [PATCH] [YoutubeDL] Force redirect URL to unicode on python 2 --- youtube_dl/YoutubeDL.py | 4 +++- youtube_dl/utils.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b09cb0a79..19370f62b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -92,6 +92,7 @@ YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, + YoutubeDLRedirectHandler, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER @@ -2343,6 +2344,7 @@ def _setup_opener(self): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) + redirect_handler = YoutubeDLRedirectHandler() data_handler = compat_urllib_request_DataHandler() # When passing our own FileHandler instance, build_opener won't add the @@ -2356,7 +2358,7 @@ def file_open(*args, **kwargs): file_handler.file_open = file_open opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) + proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f6204692a..8ccf25489 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2795,6 +2795,15 @@ def http_response(self, request, response): https_response = http_response +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + if sys.version_info[0] < 3: + def redirect_request(self, req, fp, code, msg, headers, newurl): + # On python 2 urlh.geturl() may sometimes return redirect URL + # as byte string instead of unicode. This workaround allows + # to force it always return unicode. + return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) + + def extract_timezone(date_str): m = re.search( r'^.{8,}?(?PZ$| ?(?P\+|-)(?P[0-9]{2}):?(?P[0-9]{2})$)',