From 468f104ce7d8da25ba34a1cc860b57de09aea651 Mon Sep 17 00:00:00 2001 From: m4tu4g <71326926+m4tu4g@users.noreply.github.com> Date: Tue, 3 May 2022 03:06:37 +0530 Subject: [PATCH] [masters] Add extractor (#3358) Closes #3240 Authored by: m4tu4g --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/masters.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 yt_dlp/extractor/masters.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1d4962bbe..a3da85a0f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -849,6 +849,7 @@ from .markiza import ( MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py new file mode 100644 index 000000000..d1ce07f10 --- /dev/null +++ b/yt_dlp/extractor/masters.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + unified_strdate, +) + + +class MastersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P\d{4}-\d{2}-\d{2})/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html', + 'info_dict': { + 'id': '16493755593805191', + 'ext': 'mp4', + 'title': 'Sungjae Im: Thursday Interview 2022', + 'upload_date': '20220407', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id, upload_date = self._match_valid_url(url).group('id', 'date') + content_resp = self._download_json( + f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', + video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') + self._sort_formats(formats) + + thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] + + return { + 'id': video_id, + 'title': content_resp.get('title'), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(upload_date), + 'thumbnails': thumbnails, + }