From f494ddada865afc8af916225325d2dd00c5ca233 Mon Sep 17 00:00:00 2001
From: "Lesmiscore (Naoya Ozaki)" <nao20010128@gmail.com>
Date: Thu, 3 Mar 2022 02:08:13 +0900
Subject: [PATCH] [niconico] Add playlist extractors and refactor (#2915)

Authored by: Lesmiscore
---
 yt_dlp/extractor/extractors.py |   3 +-
 yt_dlp/extractor/niconico.py   | 183 +++++++++++++++++++++++++++------
 2 files changed, 153 insertions(+), 33 deletions(-)
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 1a94c8b75..0f26dc24f 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1011,11 +1011,12 @@ from .nick import (
     NickNightIE,
     NickRuIE,
 )
-
 from .niconico import (
     NiconicoIE,
     NiconicoPlaylistIE,
     NiconicoUserIE,
+    NiconicoSeriesIE,
+    NiconicoHistoryIE,
     NicovideoSearchDateIE,
     NicovideoSearchIE,
     NicovideoSearchURLIE,
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 6e561bee5..8f56fc95b 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import datetime
 import itertools
+import functools
 import json
 import re
 
@@ -12,6 +13,7 @@ from ..compat import (
     compat_str,
     compat_parse_qs,
     compat_urllib_parse_urlparse,
+    compat_HTTPError,
 )
 from ..utils import (
     ExtractorError,
@@ -24,7 +26,9 @@ from ..utils import (
     PostProcessingError,
     remove_start,
     str_or_none,
+    traverse_obj,
     try_get,
+    unescapeHTML,
     unified_timestamp,
     urlencode_postdata,
     xpath_text,
@@ -606,8 +610,61 @@ class NiconicoIE(InfoExtractor):
         }
 
 
-class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
+class NiconicoPlaylistBaseIE(InfoExtractor):
+    _PAGE_SIZE = 100
+
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0',
+        'X-Niconico-Language': 'en-us'
+    }
+
+    def _call_api(self, list_id, resource, query):
+        "Implement this in child class"
+        pass
+
+    @staticmethod
+    def _parse_owner(item):
+        return {
+            'uploader': traverse_obj(item, ('owner', 'name')),
+            'uploader_id': traverse_obj(item, ('owner', 'id')),
+        }
+
+    def _fetch_page(self, list_id, page):
+        page += 1
+        resp = self._call_api(list_id, 'page %d' % page, {
+            'page': page,
+            'pageSize': self._PAGE_SIZE,
+        })
+        # this is needed to support both mylist and user
+        for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
+            video_id = video.get('id')
+            if not video_id:
+                # skip {"video": {"id": "blablabla", ...}}
+                continue
+            count = video.get('count') or {}
+            get_count = lambda x: int_or_none(count.get(x))
+            yield {
+                '_type': 'url',
+                'id': video_id,
+                'title': video.get('title'),
+                'url': f'https://www.nicovideo.jp/watch/{video_id}',
+                'description': video.get('shortDescription'),
+                'duration': int_or_none(video.get('duration')),
+                'view_count': get_count('view'),
+                'comment_count': get_count('comment'),
+                'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
+                'ie_key': NiconicoIE.ie_key(),
+                **self._parse_owner(video),
+            }
+
+    def _entries(self, list_id):
+        return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
+
+
+class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
+    IE_NAME = 'niconico:playlist'
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.nicovideo.jp/mylist/27411728',
@@ -618,48 +675,110 @@ class NiconicoPlaylistIE(InfoExtractor):
             'uploader': 'のっく',
             'uploader_id': '805442',
         },
-        'playlist_mincount': 225,
+        'playlist_mincount': 291,
     }, {
         'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
         'only_matching': True,
+    }, {
+        'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
+        'only_matching': True,
     }]
 
-    _API_HEADERS = {
-        'X-Frontend-ID': '6',
-        'X-Frontend-Version': '0'
-    }
+    def _call_api(self, list_id, resource, query):
+        return self._download_json(
+            f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
+            f'Downloading {resource}', query=query,
+            headers=self._API_HEADERS)['data']['mylist']
 
     def _real_extract(self, url):
         list_id = self._match_id(url)
+        mylist = self._call_api(list_id, 'list', {
+            'pageSize': 1,
+        })
+        return self.playlist_result(
+            self._entries(list_id), list_id,
+            mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
 
-        def get_page_data(pagenum, pagesize):
-            return self._download_json(
-                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
-                query={'page': 1 + pagenum, 'pageSize': pagesize},
-                headers=self._API_HEADERS).get('data').get('mylist')
 
-        data = get_page_data(0, 1)
-        title = data.get('name')
-        description = data.get('description')
-        uploader = data.get('owner').get('name')
-        uploader_id = data.get('owner').get('id')
+class NiconicoSeriesIE(InfoExtractor):
+    IE_NAME = 'niconico:series'
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
 
-        def pagefunc(pagenum):
-            data = get_page_data(pagenum, 25)
-            return ({
-                '_type': 'url',
-                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
-            } for item in data.get('items'))
+    _TESTS = [{
+        'url': 'https://www.nicovideo.jp/series/110226',
+        'info_dict': {
+            'id': '110226',
+            'title': 'ご立派ァ！のシリーズ',
+        },
+        'playlist_mincount': 10,  # as of 2021/03/17
+    }, {
+        'url': 'https://www.nicovideo.jp/series/12312/',
+        'info_dict': {
+            'id': '12312',
+            'title': 'バトルスピリッツ　お勧めカード紹介(調整中)',
+        },
+        'playlist_mincount': 97,  # as of 2021/03/17
+    }, {
+        'url': 'https://nico.ms/series/203559',
+        'only_matching': True,
+    }]
 
-        return {
-            '_type': 'playlist',
-            'id': list_id,
-            'title': title,
-            'description': description,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'entries': OnDemandPagedList(pagefunc, 25),
-        }
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
+
+        title = self._search_regex(
+            (r'<title>「(.+)（全',
+             r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
+            webpage, 'title', fatal=False)
+        if title:
+            title = unescapeHTML(title)
+        playlist = [
+            self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
+            for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+        return self.playlist_result(playlist, list_id, title)
+
+
+class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
+    IE_NAME = 'niconico:history'
+    IE_DESC = 'NicoNico user history. Requires cookies.'
+    _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
+
+    _TESTS = [{
+        'note': 'PC page, with /video',
+        'url': 'https://www.nicovideo.jp/my/history/video',
+        'only_matching': True,
+    }, {
+        'note': 'PC page, without /video',
+        'url': 'https://www.nicovideo.jp/my/history',
+        'only_matching': True,
+    }, {
+        'note': 'mobile page, with /video',
+        'url': 'https://sp.nicovideo.jp/my/history/video',
+        'only_matching': True,
+    }, {
+        'note': 'mobile page, without /video',
+        'url': 'https://sp.nicovideo.jp/my/history',
+        'only_matching': True,
+    }]
+
+    def _call_api(self, list_id, resource, query):
+        return self._download_json(
+            'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
+            f'Downloading {resource}', query=query,
+            headers=self._API_HEADERS)['data']
+
+    def _real_extract(self, url):
+        list_id = 'history'
+        try:
+            mylist = self._call_api(list_id, 'list', {
+                'pageSize': 1,
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                self.raise_login_required('You have to be logged in to get your watch history')
+            raise
+        return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
 
 
 class NicovideoSearchBaseIE(InfoExtractor):