[ie/loco] Fix extractor (#12934 )

Closes #12930 Authored by: seproDev
[ie/rtve] Rework extractors (#10388 )
2025-04-30 16:00:17 +02:00 · 2025-04-19 02:02:09 +02:00 · 2025-04-19 01:47:14 +02:00 · 2025-04-19 01:35:47 +02:00 · 2025-04-18 22:12:31 +02:00 · 2025-04-18 21:12:27 +02:00
11 changed files with 551 additions and 390 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1783,7 +1783,6 @@ from .rtvcplay import (
 from .rtve import (
    RTVEALaCartaIE,
    RTVEAudioIE,
-    RTVEInfantilIE,
    RTVELiveIE,
    RTVETelevisionIE,
 )
@ -2237,7 +2236,10 @@ from .tvplay import (
    TVPlayIE,
 )
 from .tvplayer import TVPlayerIE
-from .tvw import TvwIE
+from .tvw import (
+    TvwIE,
+    TvwTvChannelsIE,
+)
 from .tweakers import TweakersIE
 from .twentymin import TwentyMinutenIE
 from .twentythreevideo import TwentyThreeVideoIE
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@ -21,6 +21,7 @@ from ..utils import (
    int_or_none,
    time_seconds,
    traverse_obj,
+    update_url,
    update_url_query,
 )

@ -417,6 +418,10 @@ class AbemaTVIE(AbemaTVBaseIE):
            'is_live': is_live,
            'availability': availability,
        })
+
+        if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
+            info['thumbnails'] = [{'url': thumbnail}]
+
        return info


--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@ -1,64 +1,105 @@
+import urllib.parse
+
 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
 from ..utils import (
    ExtractorError,
    int_or_none,
+    parse_age_limit,
+    url_or_none,
    urlencode_postdata,
 )
+from ..utils.traversal import traverse_obj


 class AtresPlayerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
+    _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
    _NETRC_MACHINE = 'atresplayer'
-    _TESTS = [
-        {
-            'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
-            'info_dict': {
-                'id': '5d4aa2c57ed1a88fc715a615',
-                'ext': 'mp4',
-                'title': 'Capítulo 7: Asuntos pendientes',
-                'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
-                'duration': 3413,
-            },
-            'skip': 'This video is only available for registered users',
+    _TESTS = [{
+        'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': '67f2dfb2fb6ab0e4c7203849',
+            'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
+            'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
+            'channel': 'laSexta',
+            'duration': 31,
+            'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
+            'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
+            'series': 'El Objetivo',
+            'season': 'Temporada 12',
+            'timestamp': 1743970079,
+            'upload_date': '20250406',
        },
-        {
-            'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
-            'only_matching': True,
+    }, {
+        'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': '67f836baa4a5b0e4147ca59a',
+            'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
+            'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
+            'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
+            'channel': 'Antena 3',
+            'duration': 2556,
+            'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
+            'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
+            'series': 'El Hormiguero ',
+            'season': 'Temporada 14',
+            'timestamp': 1744320111,
+            'upload_date': '20250410',
        },
-        {
-            'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
-            'only_matching': True,
+    }, {
+        'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': '67a6038b64ceca00070f4f69',
+            'display_id': 'capitulo-3-supervivientes',
+            'title': 'Capítulo 3: Supervivientes',
+            'description': 'md5:65b231f20302f776c2b0dd24594599a1',
+            'channel': 'Flooxer',
+            'duration': 1196,
+            'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
+            'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
+            'series': 'BIARA: Proyecto Lázarus',
+            'season': 'Temporada 1',
+            'season_number': 1,
+            'episode': 'Episode 3',
+            'episode_number': 3,
+            'timestamp': 1743095191,
+            'upload_date': '20250327',
        },
-    ]
+    }, {
+        'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
+        'only_matching': True,
+    }]
    _API_BASE = 'https://api.atresplayer.com/'

    def _perform_login(self, username, password):
-        self._request_webpage(
-            self._API_BASE + 'login', None, 'Downloading login page')
-
        try:
-            target_url = self._download_json(
-                'https://account.atresmedia.com/api/login', None,
-                'Logging in', headers={
-                    'Content-Type': 'application/x-www-form-urlencoded',
-                }, data=urlencode_postdata({
+            self._download_webpage(
+                'https://account.atresplayer.com/auth/v1/login', None,
+                'Logging in', 'Failed to log in', data=urlencode_postdata({
                    'username': username,
                    'password': password,
-                }))['targetUrl']
+                }))
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                raise ExtractorError('Invalid username and/or password', expected=True)
            raise

-        self._request_webpage(target_url, None, 'Following Target URL')
-
    def _real_extract(self, url):
        display_id, video_id = self._match_valid_url(url).groups()

+        metadata_url = self._download_json(
+            self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
+            query={'href': urllib.parse.urlparse(url).path})['href']
+        metadata = self._download_json(metadata_url, video_id)
+
        try:
-            episode = self._download_json(
-                self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
+            video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                error = self._parse_json(e.cause.response.read(), None)
@ -67,37 +108,45 @@ class AtresPlayerIE(InfoExtractor):
                raise ExtractorError(error['error_description'], expected=True)
            raise

-        title = episode['titulo']
-
        formats = []
        subtitles = {}
-        for source in episode.get('sources', []):
-            src = source.get('src')
-            if not src:
-                continue
+        for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
+            src_url = source['src']
            src_type = source.get('type')
-            if src_type == 'application/vnd.apple.mpegurl':
-                formats, subtitles = self._extract_m3u8_formats(
-                    src, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False)
-            elif src_type == 'application/dash+xml':
-                formats, subtitles = self._extract_mpd_formats(
-                    src, video_id, mpd_id='dash', fatal=False)
-
-        heartbeat = episode.get('heartbeat') or {}
-        omniture = episode.get('omniture') or {}
-        get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
+            if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            elif src_type in ('application/dash+xml', 'application/dash+hevc'):
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    src_url, video_id, mpd_id='dash', fatal=False)
+            else:
+                continue
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)

        return {
            'display_id': display_id,
            'id': video_id,
-            'title': title,
-            'description': episode.get('descripcion'),
-            'thumbnail': episode.get('imgPoster'),
-            'duration': int_or_none(episode.get('duration')),
            'formats': formats,
-            'channel': get_meta('channel'),
-            'season': get_meta('season'),
-            'episode_number': int_or_none(get_meta('episodeNumber')),
            'subtitles': subtitles,
+            **traverse_obj(video_data, {
+                'title': ('titulo', {str}),
+                'description': ('descripcion', {str}),
+                'duration': ('duration', {int_or_none}),
+                'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
+                'age_limit': ('ageRating', {parse_age_limit}),
+            }),
+            **traverse_obj(metadata, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'duration': ('duration', {int_or_none}),
+                'tags': ('tags', ..., 'title', {str}),
+                'age_limit': ('ageRating', {parse_age_limit}),
+                'series': ('format', 'title', {str}),
+                'season': ('currentSeason', 'title', {str}),
+                'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
+                'episode_number': ('numberOfEpisode', {int_or_none}),
+                'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
+                'channel': ('channel', 'title', {str}),
+            }),
        }
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@ -353,7 +353,7 @@ class CDAIE(InfoExtractor):

 class CDAFolderIE(InfoExtractor):
    _MAX_PAGE_SIZE = 36
-    _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
    _TESTS = [
        {
            'url': 'https://www.cda.pl/domino264/folder/31188385',
@ -378,6 +378,9 @@ class CDAFolderIE(InfoExtractor):
                'title': 'TESTY KOSMETYKÓW',
            },
            'playlist_mincount': 139,
+        }, {
+            'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
+            'only_matching': True,
        }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@ -82,7 +82,10 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):


 class LinkedInIE(LinkedInBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
+    _VALID_URL = [
+        r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
+        r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
+    ]
    _TESTS = [{
        'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
        'info_dict': {
@ -106,6 +109,9 @@ class LinkedInIE(LinkedInBaseIE):
            'like_count': int,
            'subtitles': 'mincount:1',
        },
+    }, {
+        'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/loco.py
+++ b/yt_dlp/extractor/loco.py
@ -1,5 +1,9 @@
+import json
+import random
+import time
+
 from .common import InfoExtractor
-from ..utils import int_or_none, url_or_none
+from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
 from ..utils.traversal import require, traverse_obj


@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
            'upload_date': '20250226',
            'modified_date': '20250226',
        },
+    }, {
+        # Requires video authorization
+        'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
+        'md5': '0513edf85c1e65c9521f555f665387d5',
+        'info_dict': {
+            'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
+            'ext': 'mp4',
+            'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
+            'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
+            'uploader_id': '7Y9JNAZC3Q',
+            'channel': 'ayellol',
+            'channel_follower_count': int,
+            'comment_count': int,
+            'view_count': int,
+            'concurrent_view_count': int,
+            'like_count': int,
+            'duration': 1229,
+            'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
+            'tags': ['Gameplay', 'Carry'],
+            'series': 'League of Legends',
+            'timestamp': 1741182253,
+            'upload_date': '20250305',
+            'modified_timestamp': 1741182419,
+            'modified_date': '20250305',
+        },
    }]

+    # From _app.js
+    _CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
+    _CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
+
+    def _is_jwt_expired(self, token):
+        return jwt_decode_hs256(token)['exp'] - time.time() < 300
+
+    def _get_access_token(self, video_id):
+        access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
+        if access_token and not self._is_jwt_expired(access_token):
+            return access_token
+        access_token = traverse_obj(self._download_json(
+            'https://api.getloconow.com/v3/user/device_profile/', video_id,
+            'Downloading access token', fatal=False, data=json.dumps({
+                'platform': 7,
+                'client_id': self._CLIENT_ID,
+                'client_secret': self._CLIENT_SECRET,
+                'model': 'Mozilla',
+                'os_name': 'Win32',
+                'os_ver': '5.0 (Windows)',
+                'app_ver': '5.0 (Windows)',
+            }).encode(), headers={
+                'Content-Type': 'application/json;charset=utf-8',
+                'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
+                'X-APP-LANG': 'en',
+                'X-APP-LOCALE': 'en-US',
+                'X-CLIENT-ID': self._CLIENT_ID,
+                'X-CLIENT-SECRET': self._CLIENT_SECRET,
+                'X-PLATFORM': '7',
+            }), 'access_token')
+        if access_token and not self._is_jwt_expired(access_token):
+            self._set_cookie('.loco.com', 'access_token', access_token)
+            return access_token
+
    def _real_extract(self, url):
        video_type, video_id = self._match_valid_url(url).group('type', 'id')
        webpage = self._download_webpage(url, video_id)
        stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
-            'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
+            'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
+
+        if access_token := self._get_access_token(video_id):
+            self._request_webpage(
+                'https://drm.loco.com/v1/streams/playback/', video_id,
+                'Downloading video authorization', fatal=False, headers={
+                    'authorization': access_token,
+                }, query={
+                    'stream_uid': stream['uid'],
+                })

        return {
            'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),
--- a/yt_dlp/extractor/manyvids.py
+++ b/yt_dlp/extractor/manyvids.py
@ -1,31 +1,38 @@
-import re
-
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
    determine_ext,
-    extract_attributes,
    int_or_none,
-    str_to_int,
+    join_nonempty,
+    parse_count,
+    parse_duration,
+    parse_iso8601,
    url_or_none,
-    urlencode_postdata,
 )
+from ..utils.traversal import traverse_obj


 class ManyVidsIE(InfoExtractor):
-    _WORKING = False
    _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
    _TESTS = [{
        # preview video
-        'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
-        'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
+        'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
+        'md5': '738dc723f7735ee9602f7ea352a6d058',
        'info_dict': {
-            'id': '133957',
+            'id': '530341-preview',
            'ext': 'mp4',
-            'title': 'everthing about me (Preview)',
-            'uploader': 'ellyxxix',
+            'title': 'MV Tips & Tricks (Preview)',
+            'description': r're:I will take you on a tour around .{1313}$',
+            'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
+            'uploader': 'DestinyDiaz',
            'view_count': int,
            'like_count': int,
+            'release_timestamp': 1508419904,
+            'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
+            'release_date': '20171019',
+            'duration': 3167.0,
        },
+        'expected_warnings': ['Only extracting preview'],
    }, {
        # full video
        'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
            'id': '935718',
            'ext': 'mp4',
            'title': 'MY FACE REVEAL',
-            'description': 'md5:ec5901d41808b3746fed90face161612',
+            'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
+            'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
            'uploader': 'Sarah Calanthe',
            'view_count': int,
            'like_count': int,
+            'release_date': '20181110',
+            'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
+            'release_timestamp': 1541851200,
+            'duration': 224.0,
        },
    }]
+    _API_BASE = 'https://www.manyvids.com/bff/store/video'

    def _real_extract(self, url):
        video_id = self._match_id(url)
+        video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
+        formats, preview_only = [], True

-        real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
-        try:
-            webpage = self._download_webpage(real_url, video_id)
-        except Exception:
-            # probably useless fallback
-            webpage = self._download_webpage(url, video_id)
-
-        info = self._search_regex(
-            r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
-            webpage, 'meta details', default='')
-        info = extract_attributes(info)
-
-        player = self._search_regex(
-            r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
-            webpage, 'player details', default='')
-        player = extract_attributes(player)
-
-        video_urls_and_ids = (
-            (info.get('data-meta-video'), 'video'),
-            (player.get('data-video-transcoded'), 'transcoded'),
-            (player.get('data-video-filepath'), 'filepath'),
-            (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
-        )
-
-        def txt_or_none(s, default=None):
-            return (s.strip() or default) if isinstance(s, str) else default
-
-        uploader = txt_or_none(info.get('data-meta-author'))
-
-        def mung_title(s):
-            if uploader:
-                s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
-            return txt_or_none(s)
-
-        title = (
-            mung_title(info.get('data-meta-title'))
-            or self._html_search_regex(
-                (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
-                 r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
-                webpage, 'title', default=None)
-            or self._html_search_meta(
-                'twitter:title', webpage, 'title', fatal=True))
-
-        title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
-
-        if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
-            title += ' (Preview)'
-
-        mv_token = self._search_regex(
-            r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
-            'mv token', default=None, group='value')
-
-        if mv_token:
-            # Sets some cookies
-            self._download_webpage(
-                'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
-                video_id, note='Setting format cookies', fatal=False,
-                data=urlencode_postdata({
-                    'mvtoken': mv_token,
-                    'vid': video_id,
-                }), headers={
-                    'Referer': url,
-                    'X-Requested-With': 'XMLHttpRequest',
-                })
-
-        formats = []
-        for v_url, fmt in video_urls_and_ids:
-            v_url = url_or_none(v_url)
-            if not v_url:
+        for format_id, path in [
+            ('preview', ['teaser', 'filepath']),
+            ('transcoded', ['transcodedFilepath']),
+            ('filepath', ['filepath']),
+        ]:
+            format_url = traverse_obj(video_data, (*path, {url_or_none}))
+            if not format_url:
                continue
-            if determine_ext(v_url) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    v_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls'))
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
            else:
                formats.append({
-                    'url': v_url,
-                    'format_id': fmt,
+                    'url': format_url,
+                    'format_id': format_id,
+                    'preference': -10 if format_id == 'preview' else None,
+                    'quality': 10 if format_id == 'filepath' else None,
+                    'height': int_or_none(
+                        self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
                })
+            if format_id != 'preview':
+                preview_only = False

-        self._remove_duplicate_formats(formats)
+        metadata = traverse_obj(
+            self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
+        title = traverse_obj(metadata, ('title', {clean_html}))

-        for f in formats:
-            if f.get('height') is None:
-                f['height'] = int_or_none(
-                    self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
-            if '/preview/' in f['url']:
-                f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
-                f['preference'] = -10
-            if 'transcoded' in f['format_id']:
-                f['preference'] = f.get('preference', -1) - 1
-
-        def get_likes():
-            likes = self._search_regex(
-                rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
-                webpage, 'likes', default='')
-            likes = extract_attributes(likes)
-            return int_or_none(likes.get('data-likes'))
-
-        def get_views():
-            return str_to_int(self._html_search_regex(
-                r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
-                webpage, 'view count', default=None))
+        if preview_only:
+            title = join_nonempty(title, '(Preview)', delim=' ')
+            video_id += '-preview'
+            self.report_warning(
+                f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
-            'description': txt_or_none(info.get('data-meta-description')),
-            'uploader': txt_or_none(info.get('data-meta-author')),
-            'thumbnail': (
-                url_or_none(info.get('data-meta-image'))
-                or url_or_none(player.get('data-video-screenshot'))),
-            'view_count': get_views(),
-            'like_count': get_likes(),
+            **traverse_obj(metadata, {
+                'description': ('description', {clean_html}),
+                'uploader': ('model', 'displayName', {clean_html}),
+                'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
+                'view_count': ('views', {parse_count}),
+                'like_count': ('likes', {parse_count}),
+                'release_timestamp': ('launchDate', {parse_iso8601}),
+                'duration': ('videoDuration', {parse_duration}),
+                'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
+            }),
        }
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@ -14,8 +14,9 @@ from ..utils import (
    int_or_none,
    parse_qs,
    srt_subtitles_timecode,
-    traverse_obj,
+    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class PanoptoBaseIE(InfoExtractor):
@ -345,21 +346,16 @@ class PanoptoIE(PanoptoBaseIE):
        subtitles = {}
        for stream in streams or []:
            stream_formats = []
-            http_stream_url = stream.get('StreamHttpUrl')
-            stream_url = stream.get('StreamUrl')
-
-            if http_stream_url:
-                stream_formats.append({'url': http_stream_url})
-
-            if stream_url:
+            for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
                media_type = stream.get('ViewerMediaFileTypeName')
                if media_type in ('hls', ):
-                    m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
-                    stream_formats.extend(m3u8_formats)
-                    subtitles = self._merge_subtitles(subtitles, stream_subtitles)
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
+                    stream_formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
                else:
                    stream_formats.append({
                        'url': stream_url,
+                        'ext': media_type,
                    })
            for fmt in stream_formats:
                fmt.update({
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@ -1,35 +1,142 @@
 import base64
 import io
 import struct
+import urllib.parse

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    clean_html,
    determine_ext,
    float_or_none,
+    make_archive_id,
+    parse_iso8601,
    qualities,
-    remove_end,
-    remove_start,
-    try_get,
+    url_or_none,
 )
+from ..utils.traversal import subs_list_to_dict, traverse_obj


-class RTVEALaCartaIE(InfoExtractor):
+class RTVEBaseIE(InfoExtractor):
+    # Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
+    @staticmethod
+    def _decrypt_url(png):
+        encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
+        while True:
+            length_data = encrypted_data.read(4)
+            length = struct.unpack('!I', length_data)[0]
+            chunk_type = encrypted_data.read(4)
+            if chunk_type == b'IEND':
+                break
+            data = encrypted_data.read(length)
+            if chunk_type == b'tEXt':
+                data = bytes(filter(None, data))
+                alphabet_data, _, url_data = data.partition(b'#')
+                quality_str, _, url_data = url_data.rpartition(b'%%')
+                quality_str = quality_str.decode() or ''
+                alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
+                url = RTVEBaseIE._get_url(alphabet, url_data)
+                yield quality_str, url
+            encrypted_data.read(4)  # CRC
+
+    @staticmethod
+    def _get_url(alphabet, url_data):
+        url = ''
+        f = 0
+        e = 3
+        b = 1
+        for char in url_data.decode('iso-8859-1'):
+            if f == 0:
+                l = int(char) * 10
+                f = 1
+            else:
+                if e == 0:
+                    l += int(char)
+                    url += alphabet[l]
+                    e = (b + 3) % 4
+                    f = 0
+                    b += 1
+                else:
+                    e -= 1
+        return url
+
+    @staticmethod
+    def _get_alphabet(alphabet_data):
+        alphabet = []
+        e = 0
+        d = 0
+        for char in alphabet_data.decode('iso-8859-1'):
+            if d == 0:
+                alphabet.append(char)
+                d = e = (e + 1) % 4
+            else:
+                d -= 1
+        return alphabet
+
+    def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
+        formats, subtitles = [], {}
+        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+        for manager in ('rtveplayw', 'default'):
+            png = self._download_webpage(
+                f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
+                video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
+            if not png:
+                continue
+
+            for quality, video_url in self._decrypt_url(png):
+                ext = determine_ext(video_url)
+                if ext == 'm3u8':
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                        video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+                    formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
+                elif ext == 'mpd':
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
+                        video_url, video_id, 'dash', fatal=False)
+                    formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
+                else:
+                    formats.append({
+                        'format_id': quality,
+                        'quality': q(quality),
+                        'url': video_url,
+                    })
+        return formats, subtitles
+
+    def _parse_metadata(self, metadata):
+        return traverse_obj(metadata, {
+            'title': ('title', {str.strip}),
+            'alt_title': ('alt', {str.strip}),
+            'description': ('description', {clean_html}),
+            'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
+            'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
+            'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
+            'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
+            'duration': ('duration', {float_or_none(scale=1000)}),
+            'is_live': ('live', {bool}),
+            'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
+        })
+
+
+class RTVEALaCartaIE(RTVEBaseIE):
    IE_NAME = 'rtve.es:alacarta'
-    IE_DESC = 'RTVE a la carta'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
+    IE_DESC = 'RTVE a la carta and Play'
+    _VALID_URL = [
+        r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
+        r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
+    ]

    _TESTS = [{
-        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
-        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
+        'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
+        'md5': 'a964547824359a5753aef09d79fe984b',
        'info_dict': {
-            'id': '2491869',
+            'id': '3088905',
            'ext': 'mp4',
-            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
-            'duration': 5024.566,
-            'series': 'Balonmano',
+            'title': 'En torno a la silla',
+            'duration': 1216.981,
+            'series': 'La aventura del Saber',
+            'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
        },
-        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
    }, {
        'note': 'Live stream',
        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'is_live': True,
+            'live_status': 'is_live',
+            'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
        },
        'params': {
            'skip_download': 'live stream',
        },
    }, {
        'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
-        'md5': 'd850f3c8731ea53952ebab489cf81cbf',
+        'md5': 'f3cf0d1902d008c48c793e736706c174',
        'info_dict': {
            'id': '4236788',
            'ext': 'mp4',
-            'title': 'Servir y proteger - Capítulo 104',
-            'duration': 3222.0,
+            'title': 'Episodio 104',
+            'duration': 3222.8,
+            'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
+            'series': 'Servir y proteger',
        },
-        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
    }, {
        'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
        'only_matching': True,
    }, {
        'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
        'only_matching': True,
+    }, {
+        'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
+        'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
+        'info_dict': {
+            'id': '16177116',
+            'ext': 'mp4',
+            'title': 'Saber vivir - 07/07/24',
+            'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
+            'duration': 2162.68,
+            'series': 'Saber vivir',
+        },
+    }, {
+        'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
+        'info_dict': {
+            'id': '7048976',
+            'ext': 'mp4',
+            'title': 'Gusano',
+            'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
+            'duration': 292.86,
+            'series': 'Agus & Lui: Churros y Crafts',
+            '_old_archive_ids': ['rtveinfantil 7048976'],
+        },
    }]

-    def _real_initialize(self):
-        user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
-        self._manager = self._download_json(
-            'http://www.rtve.es/odin/loki/' + user_agent_b64,
-            None, 'Fetching manager info')['manager']
-
-    @staticmethod
-    def _decrypt_url(png):
-        encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
-        while True:
-            length = struct.unpack('!I', encrypted_data.read(4))[0]
-            chunk_type = encrypted_data.read(4)
-            if chunk_type == b'IEND':
-                break
-            data = encrypted_data.read(length)
-            if chunk_type == b'tEXt':
-                alphabet_data, text = data.split(b'\0')
-                quality, url_data = text.split(b'%%')
-                alphabet = []
-                e = 0
-                d = 0
-                for l in alphabet_data.decode('iso-8859-1'):
-                    if d == 0:
-                        alphabet.append(l)
-                        d = e = (e + 1) % 4
-                    else:
-                        d -= 1
-                url = ''
-                f = 0
-                e = 3
-                b = 1
-                for letter in url_data.decode('iso-8859-1'):
-                    if f == 0:
-                        l = int(letter) * 10
-                        f = 1
-                    else:
-                        if e == 0:
-                            l += int(letter)
-                            url += alphabet[l]
-                            e = (b + 3) % 4
-                            f = 0
-                            b += 1
-                        else:
-                            e -= 1
-
-                yield quality.decode(), url
-            encrypted_data.read(4)  # CRC
-
-    def _extract_png_formats(self, video_id):
-        png = self._download_webpage(
-            f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
-            video_id, 'Downloading url information', query={'q': 'v2'})
-        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
-        formats = []
-        for quality, video_url in self._decrypt_url(png):
-            ext = determine_ext(video_url)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    video_url, video_id, 'dash', fatal=False))
-            else:
-                formats.append({
-                    'format_id': quality,
-                    'quality': q(quality),
-                    'url': video_url,
-                })
-        return formats
+    def _get_subtitles(self, video_id):
+        subtitle_data = self._download_json(
+            f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
+            'Downloading subtitles info')
+        return traverse_obj(subtitle_data, ('page', 'items', ..., {
+            'id': ('lang', {str}),
+            'url': ('src', {url_or_none}),
+        }, all, {subs_list_to_dict(lang='es')}))

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        info = self._download_json(
+        metadata = self._download_json(
            f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
            video_id)['page']['items'][0]
-        if info['state'] == 'DESPU':
+        if metadata['state'] == 'DESPU':
            raise ExtractorError('The video is no longer available', expected=True)
-        title = info['title'].strip()
-        formats = self._extract_png_formats(video_id)
+        formats, subtitles = self._extract_png_formats_and_subtitles(video_id)

-        subtitles = None
-        sbt_file = info.get('sbtFile')
-        if sbt_file:
-            subtitles = self.extract_subtitles(video_id, sbt_file)
+        self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)

-        is_live = info.get('live') is True
+        is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')

        return {
            'id': video_id,
-            'title': title,
            'formats': formats,
-            'thumbnail': info.get('image'),
            'subtitles': subtitles,
-            'duration': float_or_none(info.get('duration'), 1000),
-            'is_live': is_live,
-            'series': info.get('programTitle'),
+            **self._parse_metadata(metadata),
+            '_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
        }

-    def _get_subtitles(self, video_id, sub_file):
-        subs = self._download_json(
-            sub_file + '.json', video_id,
-            'Downloading subtitles info')['page']['items']
-        return dict(
-            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
-            for s in subs)

-
-class RTVEAudioIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
+class RTVEAudioIE(RTVEBaseIE):
    IE_NAME = 'rtve.es:audio'
    IE_DESC = 'RTVE audio'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
            'id': '5889192',
            'ext': 'mp3',
            'title': 'Códigos informáticos',
-            'thumbnail': r're:https?://.+/1598856591583.jpg',
+            'alt_title': 'Códigos informáticos - Escuchar ahora',
            'duration': 349.440,
            'series': 'A hombros de gigantes',
+            'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
+            'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
        },
    }, {
        'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
            'id': '5791165',
            'ext': 'mp3',
            'title': 'Ignatius Farray',
+            'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
            'thumbnail': r're:https?://.+/1613243011863.jpg',
            'duration': 3559.559,
            'series': 'En Radio 3',
+            'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
        },
    }, {
        'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
            'id': '6082623',
            'ext': 'mp3',
            'title': 'Capítulo 26 y último: La muerte de Victor',
+            'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
            'thumbnail': r're:https?://.+/1632147445707.jpg',
            'duration': 3174.086,
            'series': 'Frankenstein o el moderno Prometeo',
+            'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
        },
    }]

-    def _extract_png_formats(self, audio_id):
-        """
-        This function retrieves media related png thumbnail which obfuscate
-        valuable information about the media. This information is decrypted
-        via base class _decrypt_url function providing media quality and
-        media url
-        """
-        png = self._download_webpage(
-            f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
-            audio_id, 'Downloading url information', query={'q': 'v2'})
-        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
-        formats = []
-        for quality, audio_url in self._decrypt_url(png):
-            ext = determine_ext(audio_url)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    audio_url, audio_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    audio_url, audio_id, 'dash', fatal=False))
-            else:
-                formats.append({
-                    'format_id': quality,
-                    'quality': q(quality),
-                    'url': audio_url,
-                })
-        return formats
-
    def _real_extract(self, url):
        audio_id = self._match_id(url)
-        info = self._download_json(
-            f'https://www.rtve.es/api/audios/{audio_id}.json',
-            audio_id)['page']['items'][0]
+        metadata = self._download_json(
+            f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
+
+        formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')

        return {
            'id': audio_id,
-            'title': info['title'].strip(),
-            'thumbnail': info.get('thumbnail'),
-            'duration': float_or_none(info.get('duration'), 1000),
-            'series': try_get(info, lambda x: x['programInfo']['title']),
-            'formats': self._extract_png_formats(audio_id),
+            'formats': formats,
+            'subtitles': subtitles,
+            **self._parse_metadata(metadata),
        }


-class RTVEInfantilIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
-    IE_NAME = 'rtve.es:infantil'
-    IE_DESC = 'RTVE infantil'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
-
-    _TESTS = [{
-        'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
-        'md5': '5747454717aedf9f9fdf212d1bcfc48d',
-        'info_dict': {
-            'id': '3040283',
-            'ext': 'mp4',
-            'title': 'Maneras de vivir',
-            'thumbnail': r're:https?://.+/1426182947956\.JPG',
-            'duration': 357.958,
-        },
-        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
-    }]
-
-
-class RTVELiveIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
+class RTVELiveIE(RTVEBaseIE):
    IE_NAME = 'rtve.es:live'
    IE_DESC = 'RTVE.es live streams'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = [
+        r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
+        r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
+    ]

    _TESTS = [{
        'url': 'http://www.rtve.es/directo/la-1/',
        'info_dict': {
            'id': 'la-1',
            'ext': 'mp4',
-            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'live_status': 'is_live',
+            'title': str,
+            'description': str,
+            'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
+            'timestamp': int,
+            'upload_date': str,
        },
-        'params': {
-            'skip_download': 'live stream',
+        'params': {'skip_download': 'live stream'},
+    }, {
+        'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
+        'info_dict': {
+            'id': 'tdp',
+            'ext': 'mp4',
+            'live_status': 'is_live',
+            'title': str,
+            'description': str,
+            'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
+            'timestamp': int,
+            'upload_date': str,
        },
+        'params': {'skip_download': 'live stream'},
+    }, {
+        'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
-        title = remove_start(title, 'Estoy viendo ')

-        vidplayer_id = self._search_regex(
-            (r'playerId=player([0-9]+)',
-             r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
-             r'data-id=["\'](\d+)'),
-            webpage, 'internal video ID')
+        data_setup = self._search_json(
+            r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
+            webpage, 'data_setup', video_id)
+
+        formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])

        return {
            'id': video_id,
-            'title': title,
-            'formats': self._extract_png_formats(vidplayer_id),
+            **self._search_json_ld(webpage, video_id, fatal=False),
+            'title': self._html_extract_title(webpage),
+            'formats': formats,
+            'subtitles': subtitles,
            'is_live': True,
        }


 class RTVETelevisionIE(InfoExtractor):
    IE_NAME = 'rtve.es:television'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'

    _TEST = {
-        'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
+        'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
        'info_dict': {
-            'id': '3069778',
+            'id': '572515',
            'ext': 'mp4',
-            'title': 'Documentos TV - La revolución del móvil',
-            'duration': 3496.948,
+            'title': 'Clase inédita',
+            'duration': 335.817,
+            'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
+            'series': 'El coro de la cárcel',
        },
        'params': {
            'skip_download': True,
@ -332,11 +366,8 @@ class RTVETelevisionIE(InfoExtractor):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)

-        alacarta_url = self._search_regex(
-            r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
-            webpage, 'alacarta url', default=None)
-        if alacarta_url is None:
-            raise ExtractorError(
-                'The webpage doesn\'t contain any video', expected=True)
+        play_url = self._html_search_meta('contentUrl', webpage)
+        if play_url is None:
+            raise ExtractorError('The webpage doesn\'t contain any video', expected=True)

-        return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
+        return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):

 class TVPVODVideoIE(TVPVODBaseIE):
    IE_NAME = 'tvp:vod'
-    _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
+    _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'

    _TESTS = [{
        'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
            'live_status': 'is_live',
            'thumbnail': 're:https?://.+',
        },
+    }, {
+        'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/tvw.py
+++ b/yt_dlp/extractor/tvw.py
@ -1,13 +1,21 @@
 import json

 from .common import InfoExtractor
-from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
-from ..utils.traversal import traverse_obj
+from ..utils import (
+    clean_html,
+    extract_attributes,
+    parse_qs,
+    remove_end,
+    require,
+    unified_timestamp,
+    url_or_none,
+)
+from ..utils.traversal import find_element, traverse_obj


 class TvwIE(InfoExtractor):
+    IE_NAME = 'tvw'
    _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
-
    _TESTS = [{
        'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
        'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -115,3 +123,43 @@ class TvwIE(InfoExtractor):
                'is_live': ('eventStatus', {lambda x: x == 'live'}),
            }),
        }
+
+
+class TvwTvChannelsIE(InfoExtractor):
+    IE_NAME = 'tvw:tvchannels'
+    _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://tvw.org/tvchannels/air/',
+        'info_dict': {
+            'id': 'air',
+            'ext': 'mp4',
+            'title': r're:TVW Cable Channel Live Stream',
+            'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
+            'live_status': 'is_live',
+        },
+    }, {
+        'url': 'https://tvw.org/tvchannels/tvw2/',
+        'info_dict': {
+            'id': 'tvw2',
+            'ext': 'mp4',
+            'title': r're:TVW-2 Broadcast Channel',
+            'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
+            'live_status': 'is_live',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        m3u8_url = traverse_obj(webpage, (
+            {find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
+            'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
+
+        return {
+            'id': video_id,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
+            'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'is_live': True,
+        }
Author	SHA1	Message	Date
sepro	f5a37ea40e	[ie/loco] Fix extractor (#12934 ) Closes #12930 Authored by: seproDev	2025-04-19 02:02:09 +02:00
Florentin Le Moal	f07ee91c71	[ie/rtve] Rework extractors (#10388 ) Closes #1346, Closes #5756 Authored by: meGAmeS1, seproDev Co-authored-by: sepro <sepro@sepr0.com>	2025-04-19 01:47:14 +02:00
fries1234	ed8ad1b4d6	[ie/tvw:tvchannels] Add extractor (#12721 ) Authored by: fries1234	2025-04-19 01:35:47 +02:00
Florentin Le Moal	839d643253	[ie/AtresPlayer] Rework extractor (#11424 ) Closes #996, Closes #1165 Authored by: meGAmeS1, seproDev Co-authored-by: sepro <sepro@sepr0.com>	2025-04-18 22:12:31 +02:00
香芋奶茶	f5736bb35b	[ie/AbemaTV] Fix thumbnail extraction (#12859 ) Closes #12858 Authored by: Kiritomo	2025-04-18 21:12:27 +02:00
sepro	9d26daa04a	[ie/panopto] Fix formats extraction (#12925 ) Closes #11042 Authored by: seproDev	2025-04-18 21:09:41 +02:00
sepro	73a26f9ee6	[ie/linkedin] Support feed URLs (#12927 ) Closes #6104 Authored by: seproDev	2025-04-18 21:08:13 +02:00
sepro	4e69a626cc	[ie/tvp:vod] Improve `_VALID_URL` (#12923 ) Closes #12917 Authored by: seproDev	2025-04-18 21:05:01 +02:00
pj47x	77aa15e98f	[ie/manyvids] Fix extractor (#10907 ) Closes #8268 Authored by: pj47x	2025-04-18 18:38:58 +00:00
Michał Walenciak	cb271d445b	[ie/CDAFolder] Extend `_VALID_URL` (#12919 ) Closes #12918 Authored by: Kicer86, fireattack Co-authored-by: fireattack <human.peng@gmail.com>	2025-04-18 18:32:38 +00:00