[dropout] Add extractor (#2045)

Authored-by: TwoThousandHedgehogs, pukkandan
This commit is contained in:
Sonic 2021-12-24 06:49:33 -05:00 committed by GitHub
parent a709d87335
commit e9efb99f66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 216 additions and 0 deletions

212
yt_dlp/extractor/dropout.py Normal file
View File

@ -0,0 +1,212 @@
# coding: utf-8
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
clean_html,
ExtractorError,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
int_or_none,
join_nonempty,
unified_strdate,
urlencode_postdata,
)
class DropoutIE(InfoExtractor):
_LOGIN_URL = 'https://www.dropout.tv/login'
_NETRC_MACHINE = 'dropout'
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
_TESTS = [
{
'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
'note': 'Episode in a series',
'md5': '5e000fdfd8d8fa46ff40456f1c2af04a',
'info_dict': {
'id': '738153',
'display_id': 'yes-or-no',
'ext': 'mp4',
'title': 'Yes or No',
'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
'release_date': '20200508',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
'series': 'Game Changer',
'season_number': 2,
'season': 'Season 2',
'episode_number': 6,
'episode': 'Yes or No',
'duration': 1180,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1',
'note': 'Episode in a series (missing release_date)',
'md5': '712caf7c191f1c47c8f1879520c2fa5c',
'info_dict': {
'id': '320562',
'display_id': 'episode-1',
'ext': 'mp4',
'title': 'The Beginning Begins',
'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg',
'series': 'Dimension 20: Fantasy High',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'The Beginning Begins',
'duration': 6838,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
'note': 'Episode not in a series',
'md5': 'c30fa18999c5880d156339f13c953a26',
'info_dict': {
'id': '1915774',
'display_id': 'misfits-magic-holiday-special',
'ext': 'mp4',
'title': 'Misfits & Magic Holiday Special',
'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
'release_date': '20211215',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
'duration': 11698,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
}
]
def _get_authenticity_token(self, display_id):
signin_page = self._download_webpage(
self._LOGIN_URL, display_id, note='Getting authenticity token')
return self._html_search_regex(
r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']',
signin_page, 'authenticity_token')
def _login(self, display_id):
username, password = self._get_login_info()
if not (username and password):
self.raise_login_required(method='password')
response = self._download_webpage(
self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
'utf8': True
}))
user_has_subscription = self._search_regex(
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
if user_has_subscription.lower() == 'true':
return response
elif user_has_subscription.lower() == 'false':
raise ExtractorError('Account is not subscribed')
else:
raise ExtractorError('Incorrect username/password')
def _real_extract(self, url):
display_id = self._match_id(url)
try:
self._login(display_id)
webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
finally:
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out')
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
watch_info = get_element_by_id('watch-info', webpage) or ''
title = clean_html(get_element_by_class('video-title', watch_info))
season_episode = get_element_by_class(
'site-font-secondary-color', get_element_by_class('text', watch_info))
episode_number = int_or_none(self._search_regex(
r'Episode (\d+)', season_episode or '', 'episode', default=None))
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
'url': embed_url,
'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
'description': self._html_search_meta('description', webpage, fatal=False),
'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale
'series': clean_html(get_element_by_class('series-title', watch_info)),
'episode_number': episode_number,
'episode': title if episode_number else None,
'season_number': int_or_none(self._search_regex(
r'Season (\d+),', season_episode or '', 'season', default=None)),
'release_date': unified_strdate(self._search_regex(
r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']',
watch_info, 'release date', default=None)),
}
class DropoutSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 17,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
}
},
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 17,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
}
},
{
'url': 'https://www.dropout.tv/dimension-20-shriek-week',
'note': 'Single-season series',
'playlist_count': 4,
'info_dict': {
'id': 'dimension-20-shriek-week-season-1',
'title': 'Dimension 20 Shriek Week - Season 1'
}
}
]
def _real_extract(self, url):
season_id = self._match_id(url)
season_title = season_id.replace('-', ' ').title()
webpage = self._download_webpage(url, season_id)
entries = [
self.url_result(
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
item, 'item_url'),
ie=DropoutIE.ie_key()
) for item in get_elements_by_class('js-collection-item', webpage)
]
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
seasons, 'current_season', default='').strip()
return {
'_type': 'playlist',
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
'title': join_nonempty(season_title, current_season, delim=' - '),
'entries': entries
}

View File

@ -385,6 +385,10 @@ from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
from .doodstream import DoodStreamIE from .doodstream import DoodStreamIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .dropout import (
DropoutSeasonIE,
DropoutIE
)
from .dw import ( from .dw import (
DWIE, DWIE,
DWArticleIE, DWArticleIE,