From f8f80e428b61d0b2b7c5b807f67b896f5d0aedf6 Mon Sep 17 00:00:00 2001 From: pasha Date: Tue, 19 Sep 2023 13:06:07 -0700 Subject: [PATCH 1/3] mojevideo Add extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mojevideo.py | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 yt_dlp/extractor/mojevideo.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 490b010b8..802a9804d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1123,6 +1123,7 @@ MofosexEmbedIE, ) from .mojvideo import MojvideoIE +from .mojevideo import MojevideoIE from .morningstar import MorningstarIE from .motherless import ( MotherlessIE, diff --git a/yt_dlp/extractor/mojevideo.py b/yt_dlp/extractor/mojevideo.py new file mode 100644 index 000000000..d9c86e035 --- /dev/null +++ b/yt_dlp/extractor/mojevideo.py @@ -0,0 +1,44 @@ +import re +from .common import InfoExtractor +from ..utils import ExtractorError + + +class MojevideoIE(InfoExtractor): + _VALID_URL = r'https://www\.mojevideo\.sk/video/(?P[0-9]+)' + + _TESTS = [{ + 'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html', + 'md5': '384a4628bd2bbd261c5206cf77c38c17', + 'info_dict': { + 'id': '250236', + 'ext': 'mp4', + 'title': 'Chlapci dobetónovali sme, máme hotovo!', + 'description': 'Celodenná práca bola za pár sekúnd fuč. Betón stiekol k susedovi, kam aj zrútil celý plot, ktorý polámal aj tuje. Chlapom zostali iba oči pre plač.' + } + }] + + def _real_extract(self, url): + webpage = self._download_webpage(url, 1) + + video_id = re.search(r'vId=(\d+)', webpage).group(1) + video_expiration = re.search(r"vEx='(\d+)'", webpage).group(1) + video_hash = re.search(r'vHash=\[([^\]]+)', webpage).group(1).split(",")[0].replace("'", "") + video_title = re.search(r'

(.*?)

', webpage).group(1) + video_description = re.search(r'
.*?

(.*?)

', webpage, re.DOTALL).group(1) + + info = {} + video_url = "https://cache01.mojevideo.sk/securevideos69/" + video_id + ".mp4?md5=" + video_hash + "&expires=" + video_expiration + if video_url: + print(video_id) + info = { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'description': video_description + } + if not info: + raise ExtractorError('No videos found on webpage', expected=True) + + return { + **info, + } From 0b6daccd26382b20a602493a38859b5b5db6db3d Mon Sep 17 00:00:00 2001 From: pasha Date: Wed, 20 Sep 2023 10:40:59 -0700 Subject: [PATCH 2/3] removed unnecessary debug print --- yt_dlp/extractor/mojevideo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/mojevideo.py b/yt_dlp/extractor/mojevideo.py index d9c86e035..b1fd54ad2 100644 --- a/yt_dlp/extractor/mojevideo.py +++ b/yt_dlp/extractor/mojevideo.py @@ -29,7 +29,6 @@ def _real_extract(self, url): info = {} video_url = "https://cache01.mojevideo.sk/securevideos69/" + video_id + ".mp4?md5=" + video_hash + "&expires=" + video_expiration if video_url: - print(video_id) info = { 'id': video_id, 'url': video_url, From 96142ae31b3173920f95d9b971a70c2a1f16e66b Mon Sep 17 00:00:00 2001 From: pasha Date: Sat, 23 Sep 2023 17:59:14 -0700 Subject: [PATCH 3/3] update mojevideo.py --- yt_dlp/extractor/mojevideo.py | 54 +++++++++++++++-------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/yt_dlp/extractor/mojevideo.py b/yt_dlp/extractor/mojevideo.py index b1fd54ad2..12c0cf67c 100644 --- a/yt_dlp/extractor/mojevideo.py +++ b/yt_dlp/extractor/mojevideo.py @@ -1,43 +1,35 @@ -import re from .common import InfoExtractor -from ..utils import ExtractorError class MojevideoIE(InfoExtractor): - _VALID_URL = r'https://www\.mojevideo\.sk/video/(?P[0-9]+)' + _VALID_URL = r'https?://(www\.)?mojevideo\.sk/video/(?P\w+)' - _TESTS = [{ - 'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html', - 'md5': '384a4628bd2bbd261c5206cf77c38c17', - 'info_dict': { - 'id': '250236', - 'ext': 'mp4', - 'title': 'Chlapci dobetónovali sme, máme hotovo!', - 'description': 'Celodenná práca bola za pár sekúnd fuč. Betón stiekol k susedovi, kam aj zrútil celý plot, ktorý polámal aj tuje. Chlapom zostali iba oči pre plač.' + _TESTS = [ + { + 'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html', + 'md5': '384a4628bd2bbd261c5206cf77c38c17', + 'info_dict': { + 'id': '250236', + 'ext': 'mp4', + 'title': 'Chlapci dobetónovali sme, máme hotovo! - Mojevideo', + 'description': 'Celodenná práca bola za pár sekúnd fuč. Betón stiekol k susedovi, kam aj zrútil celý plot, ktorý polámal aj tuje....' + } } - }] + ] def _real_extract(self, url): - webpage = self._download_webpage(url, 1) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - video_id = re.search(r'vId=(\d+)', webpage).group(1) - video_expiration = re.search(r"vEx='(\d+)'", webpage).group(1) - video_hash = re.search(r'vHash=\[([^\]]+)', webpage).group(1).split(",")[0].replace("'", "") - video_title = re.search(r'

(.*?)

', webpage).group(1) - video_description = re.search(r'
.*?

(.*?)

', webpage, re.DOTALL).group(1) - - info = {} - video_url = "https://cache01.mojevideo.sk/securevideos69/" + video_id + ".mp4?md5=" + video_hash + "&expires=" + video_expiration - if video_url: - info = { - 'id': video_id, - 'url': video_url, - 'title': video_title, - 'description': video_description - } - if not info: - raise ExtractorError('No videos found on webpage', expected=True) + v_id = self._search_regex(r'\bvId=(\d+)', webpage, 'video id') + v_exp = self._search_regex(r'\bvEx=\'(\d+)', webpage, 'expiry') + v_hash = self._search_regex(r'\bvHash=\[([^\]]+)', webpage, 'hash').split(",")[0].replace("'", "") + v_title = self._html_extract_title(webpage, 'title') return { - **info, + 'id': v_id, + 'url': f'https://cache01.mojevideo.sk/securevideos69/{v_id}.mp4?md5={v_hash}&expires={v_exp}', + 'title': v_title, + 'description': self._og_search_description(webpage, default=None), + }