From ddd24c99493483bde822944e8063064f53464ac1 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Wed, 8 Dec 2021 02:03:48 +0900 Subject: [PATCH] [ntvcojp] Extract NUXT data (#1915) Fixes: https://github.com/ytdl-org/youtube-dl/issues/30309 Authored by: nao20010128nao --- yt_dlp/extractor/ntvcojp.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py index 0c8221b22..c9af91188 100644 --- a/yt_dlp/extractor/ntvcojp.py +++ b/yt_dlp/extractor/ntvcojp.py @@ -3,8 +3,9 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, + ExtractorError, smuggle_url, + traverse_obj, ) @@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor): 'ext': 'mp4', 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸', 'upload_date': '20181213', - 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9', + 'description': 'md5:1985b51a9abc285df0104d982a325f2a', 'uploader_id': '3855502814001', 'timestamp': 1544669941, }, @@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor): 'skip_download': True, }, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_config = self._parse_json(self._search_regex( - r'(?s)PLAYER_CONFIG\s*=\s*({.+?})', - webpage, 'player config'), display_id, js_to_json) - video_id = player_config['videoId'] - account_id = player_config.get('account') or '3855502814001' + player_config = self._search_nuxt_data(webpage, display_id) + video_id = traverse_obj(player_config, ('movie', 'video_id')) + if not video_id: + raise ExtractorError('Failed to extract video ID for Brightcove') + account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001' + title = traverse_obj(player_config, ('movie', 'name')) + if not title: + og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title')) + if og_title: + title = og_title.split('(', 1)[0].strip() + description = (traverse_obj(player_config, ('movie', 'description')) + or self._html_search_meta(['description', 'og:description'], webpage)) return { '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, - 'title': self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title').strip(), - 'description': self._html_search_meta(['description', 'og:description'], webpage), + 'title': title, + 'description': description, 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}), 'ie_key': 'BrightcoveNew', }