Compare commits

...

10 Commits

Author SHA1 Message Date
sepro
f5a37ea40e
[ie/loco] Fix extractor (#12934)
Closes #12930
Authored by: seproDev
2025-04-19 02:02:09 +02:00
Florentin Le Moal
f07ee91c71
[ie/rtve] Rework extractors (#10388)
Closes #1346, Closes #5756
Authored by: meGAmeS1, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2025-04-19 01:47:14 +02:00
fries1234
ed8ad1b4d6
[ie/tvw:tvchannels] Add extractor (#12721)
Authored by: fries1234
2025-04-19 01:35:47 +02:00
Florentin Le Moal
839d643253
[ie/AtresPlayer] Rework extractor (#11424)
Closes #996, Closes #1165
Authored by: meGAmeS1, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2025-04-18 22:12:31 +02:00
香芋奶茶
f5736bb35b
[ie/AbemaTV] Fix thumbnail extraction (#12859)
Closes #12858
Authored by: Kiritomo
2025-04-18 21:12:27 +02:00
sepro
9d26daa04a
[ie/panopto] Fix formats extraction (#12925)
Closes #11042
Authored by: seproDev
2025-04-18 21:09:41 +02:00
sepro
73a26f9ee6
[ie/linkedin] Support feed URLs (#12927)
Closes #6104
Authored by: seproDev
2025-04-18 21:08:13 +02:00
sepro
4e69a626cc
[ie/tvp:vod] Improve _VALID_URL (#12923)
Closes #12917
Authored by: seproDev
2025-04-18 21:05:01 +02:00
pj47x
77aa15e98f
[ie/manyvids] Fix extractor (#10907)
Closes #8268
Authored by: pj47x
2025-04-18 18:38:58 +00:00
Michał Walenciak
cb271d445b
[ie/CDAFolder] Extend _VALID_URL (#12919)
Closes #12918
Authored by: Kicer86, fireattack

Co-authored-by: fireattack <human.peng@gmail.com>
2025-04-18 18:32:38 +00:00
11 changed files with 551 additions and 390 deletions

View File

@ -1783,7 +1783,6 @@ from .rtvcplay import (
from .rtve import (
RTVEALaCartaIE,
RTVEAudioIE,
RTVEInfantilIE,
RTVELiveIE,
RTVETelevisionIE,
)
@ -2237,7 +2236,10 @@ from .tvplay import (
TVPlayIE,
)
from .tvplayer import TVPlayerIE
from .tvw import TvwIE
from .tvw import (
TvwIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE

View File

@ -21,6 +21,7 @@ from ..utils import (
int_or_none,
time_seconds,
traverse_obj,
update_url,
update_url_query,
)
@ -417,6 +418,10 @@ class AbemaTVIE(AbemaTVBaseIE):
'is_live': is_live,
'availability': availability,
})
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
info['thumbnails'] = [{'url': thumbnail}]
return info

View File

@ -1,64 +1,105 @@
import urllib.parse
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
'info_dict': {
'id': '5d4aa2c57ed1a88fc715a615',
'ext': 'mp4',
'title': 'Capítulo 7: Asuntos pendientes',
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
'duration': 3413,
},
'skip': 'This video is only available for registered users',
_TESTS = [{
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
'info_dict': {
'ext': 'mp4',
'id': '67f2dfb2fb6ab0e4c7203849',
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
'channel': 'laSexta',
'duration': 31,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
'series': 'El Objetivo',
'season': 'Temporada 12',
'timestamp': 1743970079,
'upload_date': '20250406',
},
{
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
'info_dict': {
'ext': 'mp4',
'id': '67f836baa4a5b0e4147ca59a',
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
'channel': 'Antena 3',
'duration': 2556,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
'series': 'El Hormiguero ',
'season': 'Temporada 14',
'timestamp': 1744320111,
'upload_date': '20250410',
},
{
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
'info_dict': {
'ext': 'mp4',
'id': '67a6038b64ceca00070f4f69',
'display_id': 'capitulo-3-supervivientes',
'title': 'Capítulo 3: Supervivientes',
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
'channel': 'Flooxer',
'duration': 1196,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
'series': 'BIARA: Proyecto Lázarus',
'season': 'Temporada 1',
'season_number': 1,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1743095191,
'upload_date': '20250327',
},
]
}, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
}]
_API_BASE = 'https://api.atresplayer.com/'
def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
try:
target_url = self._download_json(
'https://account.atresmedia.com/api/login', None,
'Logging in', headers={
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
self._download_webpage(
'https://account.atresplayer.com/auth/v1/login', None,
'Logging in', 'Failed to log in', data=urlencode_postdata({
'username': username,
'password': password,
}))['targetUrl']
}))
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True)
raise
self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
metadata_url = self._download_json(
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
query={'href': urllib.parse.urlparse(url).path})['href']
metadata = self._download_json(metadata_url, video_id)
try:
episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None)
@ -67,37 +108,45 @@ class AtresPlayerIE(InfoExtractor):
raise ExtractorError(error['error_description'], expected=True)
raise
title = episode['titulo']
formats = []
subtitles = {}
for source in episode.get('sources', []):
src = source.get('src')
if not src:
continue
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
src_url = source['src']
src_type = source.get('type')
if src_type == 'application/vnd.apple.mpegurl':
formats, subtitles = self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
elif src_type == 'application/dash+xml':
formats, subtitles = self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False)
heartbeat = episode.get('heartbeat') or {}
omniture = episode.get('omniture') or {}
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
fmts, subs = self._extract_mpd_formats_and_subtitles(
src_url, video_id, mpd_id='dash', fatal=False)
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'display_id': display_id,
'id': video_id,
'title': title,
'description': episode.get('descripcion'),
'thumbnail': episode.get('imgPoster'),
'duration': int_or_none(episode.get('duration')),
'formats': formats,
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
'subtitles': subtitles,
**traverse_obj(video_data, {
'title': ('titulo', {str}),
'description': ('descripcion', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
'age_limit': ('ageRating', {parse_age_limit}),
}),
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'age_limit': ('ageRating', {parse_age_limit}),
'series': ('format', 'title', {str}),
'season': ('currentSeason', 'title', {str}),
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
'episode_number': ('numberOfEpisode', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
'channel': ('channel', 'title', {str}),
}),
}

View File

@ -353,7 +353,7 @@ class CDAIE(InfoExtractor):
class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://www.cda.pl/domino264/folder/31188385',
@ -378,6 +378,9 @@ class CDAFolderIE(InfoExtractor):
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -82,7 +82,10 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
_VALID_URL = [
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': {
@ -106,6 +109,9 @@ class LinkedInIE(LinkedInBaseIE):
'like_count': int,
'subtitles': 'mincount:1',
},
}, {
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -1,5 +1,9 @@
import json
import random
import time
from .common import InfoExtractor
from ..utils import int_or_none, url_or_none
from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
from ..utils.traversal import require, traverse_obj
@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
'upload_date': '20250226',
'modified_date': '20250226',
},
}, {
# Requires video authorization
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'md5': '0513edf85c1e65c9521f555f665387d5',
'info_dict': {
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'ext': 'mp4',
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
'uploader_id': '7Y9JNAZC3Q',
'channel': 'ayellol',
'channel_follower_count': int,
'comment_count': int,
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
'duration': 1229,
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
'tags': ['Gameplay', 'Carry'],
'series': 'League of Legends',
'timestamp': 1741182253,
'upload_date': '20250305',
'modified_timestamp': 1741182419,
'modified_date': '20250305',
},
}]
# From _app.js
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
def _is_jwt_expired(self, token):
return jwt_decode_hs256(token)['exp'] - time.time() < 300
def _get_access_token(self, video_id):
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
if access_token and not self._is_jwt_expired(access_token):
return access_token
access_token = traverse_obj(self._download_json(
'https://api.getloconow.com/v3/user/device_profile/', video_id,
'Downloading access token', fatal=False, data=json.dumps({
'platform': 7,
'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
'model': 'Mozilla',
'os_name': 'Win32',
'os_ver': '5.0 (Windows)',
'app_ver': '5.0 (Windows)',
}).encode(), headers={
'Content-Type': 'application/json;charset=utf-8',
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
'X-APP-LANG': 'en',
'X-APP-LOCALE': 'en-US',
'X-CLIENT-ID': self._CLIENT_ID,
'X-CLIENT-SECRET': self._CLIENT_SECRET,
'X-PLATFORM': '7',
}), 'access_token')
if access_token and not self._is_jwt_expired(access_token):
self._set_cookie('.loco.com', 'access_token', access_token)
return access_token
def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).group('type', 'id')
webpage = self._download_webpage(url, video_id)
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
if access_token := self._get_access_token(video_id):
self._request_webpage(
'https://drm.loco.com/v1/streams/playback/', video_id,
'Downloading video authorization', fatal=False, headers={
'authorization': access_token,
}, query={
'stream_uid': stream['uid'],
})
return {
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),

View File

@ -1,31 +1,38 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
int_or_none,
str_to_int,
join_nonempty,
parse_count,
parse_duration,
parse_iso8601,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{
# preview video
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
'md5': '738dc723f7735ee9602f7ea352a6d058',
'info_dict': {
'id': '133957',
'id': '530341-preview',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
'uploader': 'ellyxxix',
'title': 'MV Tips & Tricks (Preview)',
'description': r're:I will take you on a tour around .{1313}$',
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
'uploader': 'DestinyDiaz',
'view_count': int,
'like_count': int,
'release_timestamp': 1508419904,
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
'release_date': '20171019',
'duration': 3167.0,
},
'expected_warnings': ['Only extracting preview'],
}, {
# full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
'id': '935718',
'ext': 'mp4',
'title': 'MY FACE REVEAL',
'description': 'md5:ec5901d41808b3746fed90face161612',
'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
'uploader': 'Sarah Calanthe',
'view_count': int,
'like_count': int,
'release_date': '20181110',
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
'release_timestamp': 1541851200,
'duration': 224.0,
},
}]
_API_BASE = 'https://www.manyvids.com/bff/store/video'
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
formats, preview_only = [], True
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
try:
webpage = self._download_webpage(real_url, video_id)
except Exception:
# probably useless fallback
webpage = self._download_webpage(url, video_id)
info = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'meta details', default='')
info = extract_attributes(info)
player = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
webpage, 'player details', default='')
player = extract_attributes(player)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
mv_token = self._search_regex(
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'mv token', default=None, group='value')
if mv_token:
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
})
formats = []
for v_url, fmt in video_urls_and_ids:
v_url = url_or_none(v_url)
if not v_url:
for format_id, path in [
('preview', ['teaser', 'filepath']),
('transcoded', ['transcodedFilepath']),
('filepath', ['filepath']),
]:
format_url = traverse_obj(video_data, (*path, {url_or_none}))
if not format_url:
continue
if determine_ext(v_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
else:
formats.append({
'url': v_url,
'format_id': fmt,
'url': format_url,
'format_id': format_id,
'preference': -10 if format_id == 'preview' else None,
'quality': 10 if format_id == 'filepath' else None,
'height': int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
})
if format_id != 'preview':
preview_only = False
self._remove_duplicate_formats(formats)
metadata = traverse_obj(
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
title = traverse_obj(metadata, ('title', {clean_html}))
for f in formats:
if f.get('height') is None:
f['height'] = int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
if '/preview/' in f['url']:
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
def get_likes():
likes = self._search_regex(
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
if preview_only:
title = join_nonempty(title, '(Preview)', delim=' ')
video_id += '-preview'
self.report_warning(
f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
return {
'id': video_id,
'title': title,
'formats': formats,
'description': txt_or_none(info.get('data-meta-description')),
'uploader': txt_or_none(info.get('data-meta-author')),
'thumbnail': (
url_or_none(info.get('data-meta-image'))
or url_or_none(player.get('data-video-screenshot'))),
'view_count': get_views(),
'like_count': get_likes(),
**traverse_obj(metadata, {
'description': ('description', {clean_html}),
'uploader': ('model', 'displayName', {clean_html}),
'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
'view_count': ('views', {parse_count}),
'like_count': ('likes', {parse_count}),
'release_timestamp': ('launchDate', {parse_iso8601}),
'duration': ('videoDuration', {parse_duration}),
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
}),
}

View File

@ -14,8 +14,9 @@ from ..utils import (
int_or_none,
parse_qs,
srt_subtitles_timecode,
traverse_obj,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PanoptoBaseIE(InfoExtractor):
@ -345,21 +346,16 @@ class PanoptoIE(PanoptoBaseIE):
subtitles = {}
for stream in streams or []:
stream_formats = []
http_stream_url = stream.get('StreamHttpUrl')
stream_url = stream.get('StreamUrl')
if http_stream_url:
stream_formats.append({'url': http_stream_url})
if stream_url:
for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
media_type = stream.get('ViewerMediaFileTypeName')
if media_type in ('hls', ):
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
stream_formats.extend(m3u8_formats)
subtitles = self._merge_subtitles(subtitles, stream_subtitles)
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
stream_formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
stream_formats.append({
'url': stream_url,
'ext': media_type,
})
for fmt in stream_formats:
fmt.update({

View File

@ -1,35 +1,142 @@
import base64
import io
import struct
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
float_or_none,
make_archive_id,
parse_iso8601,
qualities,
remove_end,
remove_start,
try_get,
url_or_none,
)
from ..utils.traversal import subs_list_to_dict, traverse_obj
class RTVEALaCartaIE(InfoExtractor):
class RTVEBaseIE(InfoExtractor):
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
@staticmethod
def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length_data = encrypted_data.read(4)
length = struct.unpack('!I', length_data)[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
data = bytes(filter(None, data))
alphabet_data, _, url_data = data.partition(b'#')
quality_str, _, url_data = url_data.rpartition(b'%%')
quality_str = quality_str.decode() or ''
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
url = RTVEBaseIE._get_url(alphabet, url_data)
yield quality_str, url
encrypted_data.read(4) # CRC
@staticmethod
def _get_url(alphabet, url_data):
url = ''
f = 0
e = 3
b = 1
for char in url_data.decode('iso-8859-1'):
if f == 0:
l = int(char) * 10
f = 1
else:
if e == 0:
l += int(char)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
@staticmethod
def _get_alphabet(alphabet_data):
alphabet = []
e = 0
d = 0
for char in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(char)
d = e = (e + 1) % 4
else:
d -= 1
return alphabet
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
formats, subtitles = [], {}
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
for manager in ('rtveplayw', 'default'):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
if not png:
continue
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, 'dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats, subtitles
def _parse_metadata(self, metadata):
return traverse_obj(metadata, {
'title': ('title', {str.strip}),
'alt_title': ('alt', {str.strip}),
'description': ('description', {clean_html}),
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
'duration': ('duration', {float_or_none(scale=1000)}),
'is_live': ('live', {bool}),
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
})
class RTVEALaCartaIE(RTVEBaseIE):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
IE_DESC = 'RTVE a la carta and Play'
_VALID_URL = [
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
]
_TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
'md5': 'a964547824359a5753aef09d79fe984b',
'info_dict': {
'id': '2491869',
'id': '3088905',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 5024.566,
'series': 'Balonmano',
'title': 'En torno a la silla',
'duration': 1216.981,
'series': 'La aventura del Saber',
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, {
'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
'ext': 'mp4',
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
'live_status': 'is_live',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
},
'params': {
'skip_download': 'live stream',
},
}, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
'md5': 'f3cf0d1902d008c48c793e736706c174',
'info_dict': {
'id': '4236788',
'ext': 'mp4',
'title': 'Servir y proteger - Capítulo 104',
'duration': 3222.0,
'title': 'Episodio 104',
'duration': 3222.8,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'Servir y proteger',
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
}, {
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
'only_matching': True,
}, {
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
'info_dict': {
'id': '16177116',
'ext': 'mp4',
'title': 'Saber vivir - 07/07/24',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 2162.68,
'series': 'Saber vivir',
},
}, {
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
'info_dict': {
'id': '7048976',
'ext': 'mp4',
'title': 'Gusano',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 292.86,
'series': 'Agus & Lui: Churros y Crafts',
'_old_archive_ids': ['rtveinfantil 7048976'],
},
}]
def _real_initialize(self):
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
self._manager = self._download_json(
'http://www.rtve.es/odin/loki/' + user_agent_b64,
None, 'Fetching manager info')['manager']
@staticmethod
def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
alphabet_data, text = data.split(b'\0')
quality, url_data = text.split(b'%%')
alphabet = []
e = 0
d = 0
for l in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data.decode('iso-8859-1'):
if f == 0:
l = int(letter) * 10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
yield quality.decode(), url
encrypted_data.read(4) # CRC
def _extract_png_formats(self, video_id):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats
def _get_subtitles(self, video_id):
subtitle_data = self._download_json(
f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
'Downloading subtitles info')
return traverse_obj(subtitle_data, ('page', 'items', ..., {
'id': ('lang', {str}),
'url': ('src', {url_or_none}),
}, all, {subs_list_to_dict(lang='es')}))
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
metadata = self._download_json(
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
video_id)['page']['items'][0]
if info['state'] == 'DESPU':
if metadata['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
title = info['title'].strip()
formats = self._extract_png_formats(video_id)
formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
subtitles = None
sbt_file = info.get('sbtFile')
if sbt_file:
subtitles = self.extract_subtitles(video_id, sbt_file)
self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
is_live = info.get('live') is True
is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': info.get('image'),
'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), 1000),
'is_live': is_live,
'series': info.get('programTitle'),
**self._parse_metadata(metadata),
'_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
}
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
class RTVEAudioIE(RTVEBaseIE):
IE_NAME = 'rtve.es:audio'
IE_DESC = 'RTVE audio'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5889192',
'ext': 'mp3',
'title': 'Códigos informáticos',
'thumbnail': r're:https?://.+/1598856591583.jpg',
'alt_title': 'Códigos informáticos - Escuchar ahora',
'duration': 349.440,
'series': 'A hombros de gigantes',
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
},
}, {
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5791165',
'ext': 'mp3',
'title': 'Ignatius Farray',
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
'thumbnail': r're:https?://.+/1613243011863.jpg',
'duration': 3559.559,
'series': 'En Radio 3',
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
},
}, {
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '6082623',
'ext': 'mp3',
'title': 'Capítulo 26 y último: La muerte de Victor',
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
'thumbnail': r're:https?://.+/1632147445707.jpg',
'duration': 3174.086,
'series': 'Frankenstein o el moderno Prometeo',
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
},
}]
def _extract_png_formats(self, audio_id):
"""
This function retrieves media related png thumbnail which obfuscate
valuable information about the media. This information is decrypted
via base class _decrypt_url function providing media quality and
media url
"""
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
audio_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, audio_url in self._decrypt_url(png):
ext = determine_ext(audio_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
audio_url, audio_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
audio_url, audio_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': audio_url,
})
return formats
def _real_extract(self, url):
audio_id = self._match_id(url)
info = self._download_json(
f'https://www.rtve.es/api/audios/{audio_id}.json',
audio_id)['page']['items'][0]
metadata = self._download_json(
f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
return {
'id': audio_id,
'title': info['title'].strip(),
'thumbnail': info.get('thumbnail'),
'duration': float_or_none(info.get('duration'), 1000),
'series': try_get(info, lambda x: x['programInfo']['title']),
'formats': self._extract_png_formats(audio_id),
'formats': formats,
'subtitles': subtitles,
**self._parse_metadata(metadata),
}
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
_TESTS = [{
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
'info_dict': {
'id': '3040283',
'ext': 'mp4',
'title': 'Maneras de vivir',
'thumbnail': r're:https?://.+/1426182947956\.JPG',
'duration': 357.958,
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}]
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
class RTVELiveIE(RTVEBaseIE):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
_VALID_URL = [
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
]
_TESTS = [{
'url': 'http://www.rtve.es/directo/la-1/',
'info_dict': {
'id': 'la-1',
'ext': 'mp4',
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
},
'params': {
'skip_download': 'live stream',
'params': {'skip_download': 'live stream'},
}, {
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
'info_dict': {
'id': 'tdp',
'ext': 'mp4',
'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
},
'params': {'skip_download': 'live stream'},
}, {
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
vidplayer_id = self._search_regex(
(r'playerId=player([0-9]+)',
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
r'data-id=["\'](\d+)'),
webpage, 'internal video ID')
data_setup = self._search_json(
r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
webpage, 'data_setup', video_id)
formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
return {
'id': video_id,
'title': title,
'formats': self._extract_png_formats(vidplayer_id),
**self._search_json_ld(webpage, video_id, fatal=False),
'title': self._html_extract_title(webpage),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}
class RTVETelevisionIE(InfoExtractor):
IE_NAME = 'rtve.es:television'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
_TEST = {
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
'info_dict': {
'id': '3069778',
'id': '572515',
'ext': 'mp4',
'title': 'Documentos TV - La revolución del móvil',
'duration': 3496.948,
'title': 'Clase inédita',
'duration': 335.817,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'El coro de la cárcel',
},
'params': {
'skip_download': True,
@ -332,11 +366,8 @@ class RTVETelevisionIE(InfoExtractor):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
alacarta_url = self._search_regex(
r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
webpage, 'alacarta url', default=None)
if alacarta_url is None:
raise ExtractorError(
'The webpage doesn\'t contain any video', expected=True)
play_url = self._html_search_meta('contentUrl', webpage)
if play_url is None:
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())

View File

@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):
class TVPVODVideoIE(TVPVODBaseIE):
IE_NAME = 'tvp:vod'
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
'live_status': 'is_live',
'thumbnail': 're:https?://.+',
},
}, {
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -1,13 +1,21 @@
import json
from .common import InfoExtractor
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
from ..utils import (
clean_html,
extract_attributes,
parse_qs,
remove_end,
require,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
class TvwIE(InfoExtractor):
IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -115,3 +123,43 @@ class TvwIE(InfoExtractor):
'is_live': ('eventStatus', {lambda x: x == 'live'}),
}),
}
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/tvchannels/air/',
'info_dict': {
'id': 'air',
'ext': 'mp4',
'title': r're:TVW Cable Channel Live Stream',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}, {
'url': 'https://tvw.org/tvchannels/tvw2/',
'info_dict': {
'id': 'tvw2',
'ext': 'mp4',
'title': r're:TVW-2 Broadcast Channel',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = traverse_obj(webpage, (
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'is_live': True,
}