From 54c2521ca656f1a0897d21cbdf34cbad4e51735b Mon Sep 17 00:00:00 2001 From: David Skrundz Date: Thu, 9 Dec 2021 04:47:56 -0700 Subject: [PATCH] [CBC Gem] Extract 1080p formats (#1913) Authored by: DavidSkrundz --- yt_dlp/extractor/cbc.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 413053499..392c77884 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -11,11 +11,13 @@ compat_str, ) from ..utils import ( + int_or_none, + join_nonempty, js_to_json, - smuggle_url, - try_get, orderedSet, + smuggle_url, strip_or_none, + try_get, ExtractorError, ) @@ -313,6 +315,37 @@ def _real_initialize(self): return self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token') + def _find_secret_formats(self, formats, video_id): + """ Find a valid video url and convert it to the secret variant """ + base_format = next((f for f in formats if f.get('vcodec') != 'none'), None) + if not base_format: + return + + base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url']) + url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url) + + secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False) + if not secret_xml: + return + + for child in secret_xml: + if child.attrib.get('Type') != 'video': + continue + for video_quality in child: + bitrate = int_or_none(video_quality.attrib.get('Bitrate')) + if not bitrate or 'Index' not in video_quality.attrib: + continue + height = int_or_none(video_quality.attrib.get('MaxHeight')) + + yield { + **base_format, + 'format_id': join_nonempty('sec', height), + 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\<1>{bitrate}\2', base_url), + 'width': int_or_none(video_quality.attrib.get('MaxWidth')), + 'tbr': bitrate / 1000.0, + 'height': height, + } + def _real_extract(self, url): video_id = self._match_id(url) video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id) @@ -335,6 +368,7 @@ def _real_extract(self, url): formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') self._remove_duplicate_formats(formats) + formats.extend(self._find_secret_formats(formats, video_id)) for format in formats: if format.get('vcodec') == 'none':