[extractor/TubeTuGraz] Add extractor (#2397)

Based on https://github.com/ytdl-org/youtube-dl/pull/26778
Authored by: Ferdi265, pukkandan
This commit is contained in:
Ferdinand Bachmann 2022-07-15 12:48:21 +02:00 committed by GitHub
parent 6edf28081f
commit 49afc1d84a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 235 additions and 0 deletions

View File

@ -1794,6 +1794,7 @@ from .trueid import TrueIDIE
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
from .tubitv import ( from .tubitv import (
TubiTvIE, TubiTvIE,
TubiTvShowIE, TubiTvShowIE,

View File

@ -0,0 +1,234 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
parse_resolution,
traverse_obj,
urlencode_postdata,
variadic,
)
class TubeTuGrazBaseIE(InfoExtractor):
_NETRC_MACHINE = 'tubetugraz'
_API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
_FORMAT_TYPES = ('presentation', 'presenter')
def _perform_login(self, username, password):
urlh = self._request_webpage(
'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
if not urlh:
return
urlh = self._request_webpage(
urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
note='logging in', errnote='unable to log in', data=urlencode_postdata({
'lang': 'de',
'_eventId_proceed': '',
'j_username': username,
'j_password': password
}))
if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
self.report_warning('unable to login: incorrect password')
def _extract_episode(self, episode_info):
id = episode_info.get('id')
formats = list(self._extract_formats(
traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
self._sort_formats(formats)
title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
creator = ', '.join(variadic(traverse_obj(
episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
return {
'id': id,
'title': title,
'creator': creator or None,
'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
'series': series_title,
'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
'episode': series_title and title,
'formats': formats
}
def _set_format_type(self, formats, type):
for f in formats:
f['format_note'] = type
if not type.startswith(self._FORMAT_TYPES[0]):
f['preference'] = -2
return formats
def _extract_formats(self, format_list, id):
has_hls, has_dash = False, False
for format_info in format_list or []:
url = traverse_obj(format_info, ('tags', 'url'), 'url')
if url is None:
continue
type = format_info.get('type') or 'unknown'
transport = (format_info.get('transport') or 'https').lower()
if transport == 'https':
formats = [{
'url': url,
'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
'fps': traverse_obj(format_info, ('video', 'framerate')),
**parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
}]
elif transport == 'hls':
has_hls, formats = True, self._extract_m3u8_formats(
url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
elif transport == 'dash':
has_dash, formats = True, self._extract_mpd_formats(
url, id, fatal=False, note=f'downloading {type} DASH manifest')
else:
# RTMP, HDS, SMOOTH, and unknown formats
# - RTMP url fails on every tested entry until now
# - HDS url 404's on every tested entry until now
# - SMOOTH url 404's on every tested entry until now
continue
yield from self._set_format_type(formats, type)
# TODO: Add test for these
for type in self._FORMAT_TYPES:
if not has_hls:
hls_formats = self._extract_m3u8_formats(
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
yield from self._set_format_type(hls_formats, type)
if not has_dash:
dash_formats = self._extract_mpd_formats(
f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
yield from self._set_format_type(dash_formats, type)
class TubeTuGrazIE(TubeTuGrazBaseIE):
IE_DESC = 'tube.tugraz.at'
_VALID_URL = r'''(?x)
https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
'''
_TESTS = [
{
'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
'info_dict': {
'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
'ext': 'mp4',
'title': '#6 (23.11.2017)',
'episode': '#6 (23.11.2017)',
'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
'creator': 'Safran C',
'duration': 3295818,
'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
}
}, {
'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
'info_dict': {
'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
'ext': 'mp4',
},
'expected_warnings': ['Extractor failed to obtain "title"'],
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_data = self._download_json(
self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
return self._extract_episode(episode_info)
class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
_VALID_URL = r'''(?x)
https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
'''
_TESTS = [{
'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'info_dict': {
'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'title': '[209351] Strassenwesen',
},
'playlist': [
{
'info_dict': {
'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'ext': 'mp4',
'title': '#4 Detailprojekt',
'episode': '#4 Detailprojekt',
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 6127024,
}
},
{
'info_dict': {
'id': '87350498-799a-44d3-863f-d1518a98b114',
'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'ext': 'mp4',
'title': '#3 Generelles Projekt',
'episode': '#3 Generelles Projekt',
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5374422,
}
},
{
'info_dict': {
'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'ext': 'mp4',
'title': '#2 Vorprojekt',
'episode': '#2 Vorprojekt',
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5566404,
}
},
{
'info_dict': {
'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
'ext': 'mp4',
'title': '#1 Variantenstudium',
'episode': '#1 Variantenstudium',
'series': '[209351] Strassenwesen',
'creator': 'Neuhold R',
'duration': 5420200,
}
}
],
'min_playlist_count': 4
}]
def _real_extract(self, url):
id = self._match_id(url)
episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
series_data = self._download_json(
'https://tube.tugraz.at/series/series.json', id, fatal=False,
note='downloading series metadata', errnote='failed to download series metadata',
query={
'seriesId': id,
'count': 1,
'sort': 'TITLE'
})
return self.playlist_result(
map(self._extract_episode, episodes_data['search-results']['result']), id,
traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))