mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-04-30 16:00:17 +02:00
Compare commits
10 Commits
ceab4d5ed6
...
f5a37ea40e
Author | SHA1 | Date | |
---|---|---|---|
|
f5a37ea40e | ||
|
f07ee91c71 | ||
|
ed8ad1b4d6 | ||
|
839d643253 | ||
|
f5736bb35b | ||
|
9d26daa04a | ||
|
73a26f9ee6 | ||
|
4e69a626cc | ||
|
77aa15e98f | ||
|
cb271d445b |
@ -1783,7 +1783,6 @@ from .rtvcplay import (
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
RTVEInfantilIE,
|
||||
RTVELiveIE,
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
@ -2237,7 +2236,10 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tvw import TvwIE
|
||||
from .tvw import (
|
||||
TvwIE,
|
||||
TvwTvChannelsIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
|
@ -21,6 +21,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -417,6 +418,10 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
|
||||
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
|
||||
info['thumbnails'] = [{'url': thumbnail}]
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
@ -1,64 +1,105 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f2dfb2fb6ab0e4c7203849',
|
||||
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
|
||||
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
|
||||
'channel': 'laSexta',
|
||||
'duration': 31,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
|
||||
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
|
||||
'series': 'El Objetivo',
|
||||
'season': 'Temporada 12',
|
||||
'timestamp': 1743970079,
|
||||
'upload_date': '20250406',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f836baa4a5b0e4147ca59a',
|
||||
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
|
||||
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
|
||||
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
|
||||
'channel': 'Antena 3',
|
||||
'duration': 2556,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
|
||||
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
|
||||
'series': 'El Hormiguero ',
|
||||
'season': 'Temporada 14',
|
||||
'timestamp': 1744320111,
|
||||
'upload_date': '20250410',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67a6038b64ceca00070f4f69',
|
||||
'display_id': 'capitulo-3-supervivientes',
|
||||
'title': 'Capítulo 3: Supervivientes',
|
||||
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
|
||||
'channel': 'Flooxer',
|
||||
'duration': 1196,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
|
||||
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
|
||||
'series': 'BIARA: Proyecto Lázarus',
|
||||
'season': 'Temporada 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1743095191,
|
||||
'upload_date': '20250327',
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
self._download_webpage(
|
||||
'https://account.atresplayer.com/auth/v1/login', None,
|
||||
'Logging in', 'Failed to log in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
metadata_url = self._download_json(
|
||||
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
|
||||
query={'href': urllib.parse.urlparse(url).path})['href']
|
||||
metadata = self._download_json(metadata_url, video_id)
|
||||
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
@ -67,37 +108,45 @@ class AtresPlayerIE(InfoExtractor):
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src_url = source['src']
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats, subtitles = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats, subtitles = self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('titulo', {str}),
|
||||
'description': ('descripcion', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
'series': ('format', 'title', {str}),
|
||||
'season': ('currentSeason', 'title', {str}),
|
||||
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
|
||||
'episode_number': ('numberOfEpisode', {int_or_none}),
|
||||
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
}),
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
@ -378,6 +378,9 @@ class CDAFolderIE(InfoExtractor):
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -82,7 +82,10 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
|
||||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
|
||||
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
@ -106,6 +109,9 @@ class LinkedInIE(LinkedInBaseIE):
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,5 +1,9 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
|
||||
'upload_date': '20250226',
|
||||
'modified_date': '20250226',
|
||||
},
|
||||
}, {
|
||||
# Requires video authorization
|
||||
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'md5': '0513edf85c1e65c9521f555f665387d5',
|
||||
'info_dict': {
|
||||
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'ext': 'mp4',
|
||||
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
|
||||
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
|
||||
'uploader_id': '7Y9JNAZC3Q',
|
||||
'channel': 'ayellol',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 1229,
|
||||
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
|
||||
'tags': ['Gameplay', 'Carry'],
|
||||
'series': 'League of Legends',
|
||||
'timestamp': 1741182253,
|
||||
'upload_date': '20250305',
|
||||
'modified_timestamp': 1741182419,
|
||||
'modified_date': '20250305',
|
||||
},
|
||||
}]
|
||||
|
||||
# From _app.js
|
||||
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
|
||||
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
|
||||
|
||||
def _is_jwt_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
def _get_access_token(self, video_id):
|
||||
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
return access_token
|
||||
access_token = traverse_obj(self._download_json(
|
||||
'https://api.getloconow.com/v3/user/device_profile/', video_id,
|
||||
'Downloading access token', fatal=False, data=json.dumps({
|
||||
'platform': 7,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
'model': 'Mozilla',
|
||||
'os_name': 'Win32',
|
||||
'os_ver': '5.0 (Windows)',
|
||||
'app_ver': '5.0 (Windows)',
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
|
||||
'X-APP-LANG': 'en',
|
||||
'X-APP-LOCALE': 'en-US',
|
||||
'X-CLIENT-ID': self._CLIENT_ID,
|
||||
'X-CLIENT-SECRET': self._CLIENT_SECRET,
|
||||
'X-PLATFORM': '7',
|
||||
}), 'access_token')
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
self._set_cookie('.loco.com', 'access_token', access_token)
|
||||
return access_token
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
|
||||
'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
|
||||
|
||||
if access_token := self._get_access_token(video_id):
|
||||
self._request_webpage(
|
||||
'https://drm.loco.com/v1/streams/playback/', video_id,
|
||||
'Downloading video authorization', fatal=False, headers={
|
||||
'authorization': access_token,
|
||||
}, query={
|
||||
'stream_uid': stream['uid'],
|
||||
})
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),
|
||||
|
@ -1,31 +1,38 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
|
||||
'md5': '738dc723f7735ee9602f7ea352a6d058',
|
||||
'info_dict': {
|
||||
'id': '133957',
|
||||
'id': '530341-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'title': 'MV Tips & Tricks (Preview)',
|
||||
'description': r're:I will take you on a tour around .{1313}$',
|
||||
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
|
||||
'uploader': 'DestinyDiaz',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1508419904,
|
||||
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
|
||||
'release_date': '20171019',
|
||||
'duration': 3167.0,
|
||||
},
|
||||
'expected_warnings': ['Only extracting preview'],
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
|
||||
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_date': '20181110',
|
||||
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
|
||||
'release_timestamp': 1541851200,
|
||||
'duration': 224.0,
|
||||
},
|
||||
}]
|
||||
_API_BASE = 'https://www.manyvids.com/bff/store/video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
|
||||
formats, preview_only = [], True
|
||||
|
||||
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
|
||||
mv_token = self._search_regex(
|
||||
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'mv token', default=None, group='value')
|
||||
|
||||
if mv_token:
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
for format_id, path in [
|
||||
('preview', ['teaser', 'filepath']),
|
||||
('transcoded', ['transcodedFilepath']),
|
||||
('filepath', ['filepath']),
|
||||
]:
|
||||
format_url = traverse_obj(video_data, (*path, {url_or_none}))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'preference': -10 if format_id == 'preview' else None,
|
||||
'quality': 10 if format_id == 'filepath' else None,
|
||||
'height': int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
|
||||
})
|
||||
if format_id != 'preview':
|
||||
preview_only = False
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
metadata = traverse_obj(
|
||||
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
|
||||
title = traverse_obj(metadata, ('title', {clean_html}))
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
if preview_only:
|
||||
title = join_nonempty(title, '(Preview)', delim=' ')
|
||||
video_id += '-preview'
|
||||
self.report_warning(
|
||||
f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
**traverse_obj(metadata, {
|
||||
'description': ('description', {clean_html}),
|
||||
'uploader': ('model', 'displayName', {clean_html}),
|
||||
'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
|
||||
'view_count': ('views', {parse_count}),
|
||||
'like_count': ('likes', {parse_count}),
|
||||
'release_timestamp': ('launchDate', {parse_iso8601}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
@ -14,8 +14,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PanoptoBaseIE(InfoExtractor):
|
||||
@ -345,21 +346,16 @@ class PanoptoIE(PanoptoBaseIE):
|
||||
subtitles = {}
|
||||
for stream in streams or []:
|
||||
stream_formats = []
|
||||
http_stream_url = stream.get('StreamHttpUrl')
|
||||
stream_url = stream.get('StreamUrl')
|
||||
|
||||
if http_stream_url:
|
||||
stream_formats.append({'url': http_stream_url})
|
||||
|
||||
if stream_url:
|
||||
for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
|
||||
media_type = stream.get('ViewerMediaFileTypeName')
|
||||
if media_type in ('hls', ):
|
||||
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
stream_formats.extend(m3u8_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, stream_subtitles)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
|
||||
stream_formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
stream_formats.append({
|
||||
'url': stream_url,
|
||||
'ext': media_type,
|
||||
})
|
||||
for fmt in stream_formats:
|
||||
fmt.update({
|
||||
|
@ -1,35 +1,142 @@
|
||||
import base64
|
||||
import io
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
class RTVEBaseIE(InfoExtractor):
|
||||
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length_data = encrypted_data.read(4)
|
||||
length = struct.unpack('!I', length_data)[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
data = bytes(filter(None, data))
|
||||
alphabet_data, _, url_data = data.partition(b'#')
|
||||
quality_str, _, url_data = url_data.rpartition(b'%%')
|
||||
quality_str = quality_str.decode() or ''
|
||||
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
|
||||
url = RTVEBaseIE._get_url(alphabet, url_data)
|
||||
yield quality_str, url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
@staticmethod
|
||||
def _get_url(alphabet, url_data):
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for char in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(char) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(char)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _get_alphabet(alphabet_data):
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for char in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(char)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
return alphabet
|
||||
|
||||
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
|
||||
formats, subtitles = [], {}
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
for manager in ('rtveplayw', 'default'):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
|
||||
if not png:
|
||||
continue
|
||||
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
video_url, video_id, 'dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_metadata(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'title': ('title', {str.strip}),
|
||||
'alt_title': ('alt', {str.strip}),
|
||||
'description': ('description', {clean_html}),
|
||||
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
|
||||
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'is_live': ('live', {bool}),
|
||||
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
|
||||
})
|
||||
|
||||
|
||||
class RTVEALaCartaIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
IE_DESC = 'RTVE a la carta and Play'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
|
||||
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
|
||||
'md5': 'a964547824359a5753aef09d79fe984b',
|
||||
'info_dict': {
|
||||
'id': '2491869',
|
||||
'id': '3088905',
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
'duration': 5024.566,
|
||||
'series': 'Balonmano',
|
||||
'title': 'En torno a la silla',
|
||||
'duration': 1216.981,
|
||||
'series': 'La aventura del Saber',
|
||||
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||
'md5': 'f3cf0d1902d008c48c793e736706c174',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104',
|
||||
'duration': 3222.0,
|
||||
'title': 'Episodio 104',
|
||||
'duration': 3222.8,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'Servir y proteger',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
|
||||
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
|
||||
'info_dict': {
|
||||
'id': '16177116',
|
||||
'ext': 'mp4',
|
||||
'title': 'Saber vivir - 07/07/24',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 2162.68,
|
||||
'series': 'Saber vivir',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
|
||||
'info_dict': {
|
||||
'id': '7048976',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gusano',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 292.86,
|
||||
'series': 'Agus & Lui: Churros y Crafts',
|
||||
'_old_archive_ids': ['rtveinfantil 7048976'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length = struct.unpack('!I', encrypted_data.read(4))[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
alphabet_data, text = data.split(b'\0')
|
||||
quality, url_data = text.split(b'%%')
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
yield quality.decode(), url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
def _extract_png_formats(self, video_id):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats
|
||||
def _get_subtitles(self, video_id):
|
||||
subtitle_data = self._download_json(
|
||||
f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
|
||||
'Downloading subtitles info')
|
||||
return traverse_obj(subtitle_data, ('page', 'items', ..., {
|
||||
'id': ('lang', {str}),
|
||||
'url': ('src', {url_or_none}),
|
||||
}, all, {subs_list_to_dict(lang='es')}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
metadata = self._download_json(
|
||||
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
if metadata['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title'].strip()
|
||||
formats = self._extract_png_formats(video_id)
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
|
||||
|
||||
subtitles = None
|
||||
sbt_file = info.get('sbtFile')
|
||||
if sbt_file:
|
||||
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||
self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
|
||||
|
||||
is_live = info.get('live') is True
|
||||
is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'is_live': is_live,
|
||||
'series': info.get('programTitle'),
|
||||
**self._parse_metadata(metadata),
|
||||
'_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, sub_file):
|
||||
subs = self._download_json(
|
||||
sub_file + '.json', video_id,
|
||||
'Downloading subtitles info')['page']['items']
|
||||
return dict(
|
||||
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVEAudioIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:audio'
|
||||
IE_DESC = 'RTVE audio'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
|
||||
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5889192',
|
||||
'ext': 'mp3',
|
||||
'title': 'Códigos informáticos',
|
||||
'thumbnail': r're:https?://.+/1598856591583.jpg',
|
||||
'alt_title': 'Códigos informáticos - Escuchar ahora',
|
||||
'duration': 349.440,
|
||||
'series': 'A hombros de gigantes',
|
||||
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
|
||||
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
|
||||
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5791165',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ignatius Farray',
|
||||
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
|
||||
'thumbnail': r're:https?://.+/1613243011863.jpg',
|
||||
'duration': 3559.559,
|
||||
'series': 'En Radio 3',
|
||||
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
|
||||
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '6082623',
|
||||
'ext': 'mp3',
|
||||
'title': 'Capítulo 26 y último: La muerte de Victor',
|
||||
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
|
||||
'thumbnail': r're:https?://.+/1632147445707.jpg',
|
||||
'duration': 3174.086,
|
||||
'series': 'Frankenstein o el moderno Prometeo',
|
||||
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_png_formats(self, audio_id):
|
||||
"""
|
||||
This function retrieves media related png thumbnail which obfuscate
|
||||
valuable information about the media. This information is decrypted
|
||||
via base class _decrypt_url function providing media quality and
|
||||
media url
|
||||
"""
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
|
||||
audio_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, audio_url in self._decrypt_url(png):
|
||||
ext = determine_ext(audio_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio_url, audio_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio_url, audio_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': audio_url,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json',
|
||||
audio_id)['page']['items'][0]
|
||||
metadata = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': info['title'].strip(),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'series': try_get(info, lambda x: x['programInfo']['title']),
|
||||
'formats': self._extract_png_formats(audio_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._parse_metadata(metadata),
|
||||
}
|
||||
|
||||
|
||||
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}]
|
||||
|
||||
|
||||
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVELiveIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
|
||||
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/directo/la-1/',
|
||||
'info_dict': {
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
|
||||
'info_dict': {
|
||||
'id': 'tdp',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
(r'playerId=player([0-9]+)',
|
||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||
r'data-id=["\'](\d+)'),
|
||||
webpage, 'internal video ID')
|
||||
data_setup = self._search_json(
|
||||
r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
|
||||
webpage, 'data_setup', video_id)
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_png_formats(vidplayer_id),
|
||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RTVETelevisionIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:television'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
|
||||
'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
|
||||
'info_dict': {
|
||||
'id': '3069778',
|
||||
'id': '572515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Documentos TV - La revolución del móvil',
|
||||
'duration': 3496.948,
|
||||
'title': 'Clase inédita',
|
||||
'duration': 335.817,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'El coro de la cárcel',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -332,11 +366,8 @@ class RTVETelevisionIE(InfoExtractor):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
alacarta_url = self._search_regex(
|
||||
r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
|
||||
webpage, 'alacarta url', default=None)
|
||||
if alacarta_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage doesn\'t contain any video', expected=True)
|
||||
play_url = self._html_search_meta('contentUrl', webpage)
|
||||
if play_url is None:
|
||||
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
|
||||
|
||||
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
|
||||
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
|
||||
|
@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):
|
||||
|
||||
class TVPVODVideoIE(TVPVODBaseIE):
|
||||
IE_NAME = 'tvp:vod'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,13 +1,21 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
parse_qs,
|
||||
remove_end,
|
||||
require,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TvwIE(InfoExtractor):
|
||||
IE_NAME = 'tvw'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
|
||||
'md5': '9ceb94fe2bb7fd726f74f16356825703',
|
||||
@ -115,3 +123,43 @@ class TvwIE(InfoExtractor):
|
||||
'is_live': ('eventStatus', {lambda x: x == 'live'}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TvwTvChannelsIE(InfoExtractor):
|
||||
IE_NAME = 'tvw:tvchannels'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/tvchannels/air/',
|
||||
'info_dict': {
|
||||
'id': 'air',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW Cable Channel Live Stream',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/tvchannels/tvw2/',
|
||||
'info_dict': {
|
||||
'id': 'tvw2',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW-2 Broadcast Channel',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = traverse_obj(webpage, (
|
||||
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
|
||||
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
|
||||
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'is_live': True,
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user