[bbc] Fix extraction of news articles (#2811)

Closes #1374 Authored by: ajj8
2024-06-20 06:26:49 +02:00 · 2022-02-17 15:54:53 +00:00 · 2022-02-17 15:54:53 +00:00 · ac184ab742
commit ac184ab742
parent 5c10453827
1 changed files with 4 additions and 4 deletions
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -1171,9 +1171,9 @@ def _real_extract(self, url):
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

-        initial_data = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
+        initial_data = self._parse_json(self._parse_json(self._search_regex(
+            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
+            'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
        if initial_data:
            def parse_media(media):
                if not media:
@ -1214,7 +1214,7 @@ def parse_media(media):
                if name == 'media-experience':
                    parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                elif name == 'article':
-                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+                    for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
                        if block.get('type') != 'media':
                            continue
                        parse_media(block.get('model'))