From f171bc8b59ecf4560dd4076be56570a4f090d519 Mon Sep 17 00:00:00 2001
From: Purdea Andrei <andrei@purdea.ro>
Date: Tue, 28 Jul 2015 18:14:06 +0300
Subject: [PATCH 1/5] [youtube] save keywords in info jason when
 --write-info-json is used

---
 youtube_dl/extractor/youtube.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0e411bfb6..15e327ec8 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1072,6 +1072,9 @@ def add_dash_mpd(video_info):
         else:
             video_categories = None
 
+        m = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>'''
+        , video_webpage, re.DOTALL | re.IGNORECASE);
+        video_tags = ", ".join(m)
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
@@ -1259,6 +1262,7 @@ def _map_to_format_list(urlmap):
             'title': video_title,
             'thumbnail': video_thumbnail,
             'description': video_description,
+            'tags' : video_tags,
             'categories': video_categories,
             'subtitles': video_subtitles,
             'automatic_captions': automatic_captions,

From a6f774e9015995393a086273df8db1d7b0c098c4 Mon Sep 17 00:00:00 2001
From: Purdea Andrei <andrei@purdea.ro>
Date: Tue, 28 Jul 2015 18:29:13 +0300
Subject: [PATCH 2/5] [youtube]: tags key in info jason is now a list

---
 youtube_dl/extractor/youtube.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 15e327ec8..c0fafbfd5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1072,9 +1072,8 @@ def add_dash_mpd(video_info):
         else:
             video_categories = None
 
-        m = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>'''
+        video_tags = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>'''
         , video_webpage, re.DOTALL | re.IGNORECASE);
-        video_tags = ", ".join(m)
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:

From 5316bf7487b608b7c085950ff2fb0444f2c36dc0 Mon Sep 17 00:00:00 2001
From: Purdea Andrei <andrei@purdea.ro>
Date: Tue, 28 Jul 2015 18:30:42 +0300
Subject: [PATCH 3/5] Documented tags as a possible dict key

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 14b9b4fe2..a227aeb9c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -187,6 +187,7 @@ class InfoExtractor(object):
                     specified in the URL.
     end_time:       Time in seconds where the reproduction should end, as
                     specified in the URL.
+    tags:           A list of keywords attached to the video.
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 

From 864f24bd2c0cf9bde034812a2049c3750c1bb05c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 29 Jul 2015 03:43:03 +0600
Subject: [PATCH 4/5] [extractor/common] Add _meta_regex and clarify tags field

---
 youtube_dl/extractor/common.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a227aeb9c..d54866d1f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -181,13 +181,13 @@ class InfoExtractor(object):
                     by YoutubeDL if it's missing)
     categories:     A list of categories that the video falls in, for example
                     ["Sports", "Berlin"]
+    tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]
     is_live:        True, False, or None (=unknown). Whether this video is a
                     live stream that goes on instead of a fixed-length video.
     start_time:     Time in seconds where the reproduction should start, as
                     specified in the URL.
     end_time:       Time in seconds where the reproduction should end, as
                     specified in the URL.
-    tags:           A list of keywords attached to the video.
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
@@ -631,6 +631,12 @@ def _og_regexes(prop):
             template % (content_re, property_re),
         ]
 
+    @staticmethod
+    def _meta_regex(prop):
+        return r'''(?isx)<meta
+                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
     def _og_search_property(self, prop, html, name=None, **kargs):
         if name is None:
             name = 'OpenGraph %s' % prop
@@ -661,9 +667,7 @@ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs
         if display_name is None:
             display_name = name
         return self._html_search_regex(
-            r'''(?isx)<meta
-                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
+            self._meta_regex(name),
             html, display_name, fatal=fatal, group='content', **kwargs)
 
     def _dc_search_uploader(self, html):

From 000b6b5ae5cc214906effe4ac5b78b579bc7db70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 29 Jul 2015 03:43:32 +0600
Subject: [PATCH 5/5] [youtube] Improve tags extraction and add test

---
 youtube_dl/extractor/youtube.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c0fafbfd5..4c449fd74 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20121002',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
+                'tags': ['youtube-dl'],
                 'like_count': int,
                 'dislike_count': int,
                 'start_time': 1,
@@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
-                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
+                'description': 'md5:782e8651347686cba06e58f71ab51773',
+                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
+                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
+                         'iconic ep', 'iconic', 'love', 'it'],
                 'uploader': 'Icona Pop',
                 'uploader_id': 'IconaPop',
             }
@@ -1072,8 +1076,10 @@ def add_dash_mpd(video_info):
         else:
             video_categories = None
 
-        video_tags = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>'''
-        , video_webpage, re.DOTALL | re.IGNORECASE);
+        video_tags = [
+            unescapeHTML(m.group('content'))
+            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
@@ -1261,8 +1267,8 @@ def _map_to_format_list(urlmap):
             'title': video_title,
             'thumbnail': video_thumbnail,
             'description': video_description,
-            'tags' : video_tags,
             'categories': video_categories,
+            'tags': video_tags,
             'subtitles': video_subtitles,
             'automatic_captions': automatic_captions,
             'duration': video_duration,