From 2cdbc06a1f9dfda4079b2529aa51089650715466 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:32:45 +0800 Subject: [PATCH] [foxnews] Support Fox News Articles (closes #10598) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/foxnews.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 387dc7bf6..a73a35e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2e795260e..e9027fb69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( + FoxNewsVideoIE, FoxNewsIE, FoxNewsInsiderIE, ) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 5c7acd795..3e9a6a08c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,7 +6,8 @@ from .common import InfoExtractor -class FoxNewsIE(AMPIE): +class FoxNewsVideoIE(AMPIE): + IE_NAME = 'foxnews:video' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -66,6 +67,35 @@ def _real_extract(self, url): return info +class FoxNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews' + + _TEST = { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id') + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, + FoxNewsVideoIE.ie_key()) + + class FoxNewsInsiderIE(InfoExtractor): _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:insider' @@ -83,7 +113,11 @@ class FoxNewsInsiderIE(InfoExtractor): 'upload_date': '20160825', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'add_ie': [FoxNewsIE.ie_key()], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': [FoxNewsVideoIE.ie_key()], } def _real_extract(self, url): @@ -98,7 +132,7 @@ def _real_extract(self, url): return { '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), + 'ie_key': FoxNewsVideoIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title,