From 8dbe9899a985a04690e467510c94c14f3314843b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 06:06:30 +0200 Subject: [PATCH] Allow users to specify an age limit (fixes #1545) With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS . Add rudimentary support in youtube, pornotube, and youporn. --- test/test_age_restriction.py | 53 +++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 6 ++++ youtube_dl/__init__.py | 4 +++ youtube_dl/extractor/common.py | 10 ++++++ youtube_dl/extractor/pornotube.py | 4 ++- youtube_dl/extractor/youporn.py | 4 ++- youtube_dl/extractor/youtube.py | 3 +- 7 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 test/test_age_restriction.py diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..943f9a315 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from helper import try_rm + + +def _download_restricted(url, filename, age): + """ Returns true iff the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + "outtmpl": "%(id)s.%(ext)s", + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = filename + '.info.json' + try_rm(json_filename) + ydl.download([url]) + res = os.path.exists(json_filename) + try_rm(json_filename) + return res + + +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', + '505835.mp4', 2, old_age=25) + + def test_pornotube(self): + self._assert_restricted( + 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + '1689755.flv', 13) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..6258c141e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -84,6 +84,8 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +311,10 @@ def _match_entry(self, info_dict): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_restriction', 0): + return u'Skipping "' + title + '" because it is age restricted' return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..7a399273a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def _hide_login_info(opts): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', + help='download only videos suitable for the given age', + default=None, type=int) authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, }) if opts.verbose: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69cdcdc1b..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,7 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen + age_limit: Age restriction for the video, as an integer (years) formats: A list of dictionaries for each format available, it must be ordered from worst to best quality. Potential fields: * url Mandatory. The URL of the video file @@ -318,6 +319,15 @@ def _og_search_video_url(self, html, name='video url', **kargs): self._og_regex('video')], html, name, **kargs) + def _rta_search(self, html): + # See http://www.rtalabel.org/index.php?content=howtofaq#single + if re.search(r'(?ix)Added (?P[0-9\/]+) by' upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) if upload_date: upload_date = unified_strdate(upload_date) + age_limit = self._rta_search(webpage) info = {'id': video_id, 'url': video_url, @@ -45,6 +46,7 @@ def _real_extract(self, url): 'upload_date': upload_date, 'title': video_title, 'ext': 'flv', - 'format': 'flv'} + 'format': 'flv', + 'age_restriction': age_limit} return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..e2860ec9d 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ def _real_extract(self, url): req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) + age_limit = self._rta_search(webpage) # Get JSON parameters json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ def _real_extract(self, url): 'ext': extension, 'format': format, 'thumbnail': thumbnail, - 'description': video_description + 'description': video_description, + 'age_restriction': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1101011ea..9bcd035bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ def _real_extract(self, url): 'description': video_description, 'player_url': player_url, 'subtitles': video_subtitles, - 'duration': video_duration + 'duration': video_duration, + 'age_restriction': 18 if age_gate else 0, }) return results