mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-04-30 07:46:38 +02:00
Compare commits
6 Commits
db6d1f145a
...
74e90dd9b8
Author | SHA1 | Date | |
---|---|---|---|
|
74e90dd9b8 | ||
|
1d45e30537 | ||
|
3c1c75ecb8 | ||
|
7faa18b83d | ||
|
a473e59233 | ||
|
45f01de00e |
@ -659,6 +659,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||
self.assertEqual(url_or_none('ws://foo.de'), 'ws://foo.de')
|
||||
self.assertEqual(url_or_none('wss://foo.de'), 'wss://foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
|
@ -85,6 +85,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
'room': {
|
||||
|
@ -903,6 +903,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .ivoox import IvooxIE
|
||||
from .iwara import (
|
||||
IwaraIE,
|
||||
IwaraPlaylistIE,
|
||||
@ -960,7 +961,10 @@ from .kick import (
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kika import (
|
||||
KikaIE,
|
||||
KikaPlaylistIE,
|
||||
)
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@ -1061,6 +1065,7 @@ from .loom import (
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import (
|
||||
LRTVODIE,
|
||||
LRTRadioIE,
|
||||
LRTStreamIE,
|
||||
)
|
||||
from .lsm import (
|
||||
|
@ -1570,6 +1570,8 @@ class InfoExtractor:
|
||||
"""Yield all json ld objects in the html"""
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
if not fatal and not isinstance(html, str):
|
||||
return
|
||||
for mobj in re.finditer(JSON_LD_RE, html):
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||
|
78
yt_dlp/extractor/ivoox.py
Normal file
78
yt_dlp/extractor/ivoox.py
Normal file
@ -0,0 +1,78 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none, urljoin
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class IvooxIE(InfoExtractor):
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?ivoox\.com/(?:\w{2}/)?[^/?#]+_rf_(?P<id>[0-9]+)_1\.html',
|
||||
r'https?://go\.ivoox\.com/rf/(?P<id>[0-9]+)',
|
||||
)
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ivoox.com/dex-08x30-rostros-del-mal-los-asesinos-en-audios-mp3_rf_143594959_1.html',
|
||||
'md5': '993f712de5b7d552459fc66aa3726885',
|
||||
'info_dict': {
|
||||
'id': '143594959',
|
||||
'ext': 'mp3',
|
||||
'timestamp': 1742731200,
|
||||
'channel': 'DIAS EXTRAÑOS con Santiago Camacho',
|
||||
'title': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
|
||||
'description': 'md5:eae8b4b9740d0216d3871390b056bb08',
|
||||
'uploader': 'Santiago Camacho',
|
||||
'thumbnail': 'https://static-1.ivoox.com/audios/c/d/5/2/cd52f46783fe735000c33a803dce2554_XXL.jpg',
|
||||
'upload_date': '20250323',
|
||||
'episode': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
|
||||
'duration': 11837,
|
||||
'tags': ['españa', 'asesinos en serie', 'arropiero', 'historia criminal', 'mataviejas'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://go.ivoox.com/rf/143594959',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ivoox.com/en/campodelgas-28-03-2025-audios-mp3_rf_144036942_1.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id, fatal=False)
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
webpage, media_id, fatal=False, traverse=('data', 0, 'data', 'audio'))
|
||||
|
||||
direct_download = self._download_json(
|
||||
f'https://vcore-web.ivoox.com/v1/public/audios/{media_id}/download-url', media_id, fatal=False,
|
||||
note='Fetching direct download link', headers={'Referer': url})
|
||||
|
||||
download_paths = {
|
||||
*traverse_obj(direct_download, ('data', 'downloadUrl', {str}, filter, all)),
|
||||
*traverse_obj(data, (('downloadUrl', 'mediaUrl'), {str}, filter)),
|
||||
}
|
||||
|
||||
formats = []
|
||||
for path in download_paths:
|
||||
formats.append({
|
||||
'url': urljoin('https://ivoox.com', path),
|
||||
'http_headers': {'Referer': url},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'uploader': self._html_search_regex(r'data-prm-author="([^"]+)"', webpage, 'author', default=None),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_regex(r'data-prm-pubdate="([^"]+)"', webpage, 'timestamp', default=None)),
|
||||
'channel': self._html_search_regex(r'data-prm-podname="([^"]+)"', webpage, 'channel', default=None),
|
||||
'title': self._html_search_regex(r'data-prm-title="([^"]+)"', webpage, 'title', default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**self._search_json_ld(webpage, media_id, default={}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('uploadDate', {parse_iso8601(delimiter=' ')}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
}),
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@ -124,3 +126,43 @@ class KikaIE(InfoExtractor):
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KikaPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w-]+/(?P<id>[a-z-]+\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/logo-die-welt-und-ich-562',
|
||||
'info_dict': {
|
||||
'id': 'logo-die-welt-und-ich-562',
|
||||
'title': 'logo!',
|
||||
'description': 'md5:7b9d7f65561b82fa512f2cfb553c397d',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}]
|
||||
|
||||
def _entries(self, playlist_url, playlist_id):
|
||||
for page in itertools.count(1):
|
||||
data = self._download_json(playlist_url, playlist_id, note=f'Downloading page {page}')
|
||||
for item in traverse_obj(data, ('content', lambda _, v: url_or_none(v['api']['url']))):
|
||||
yield self.url_result(
|
||||
item['api']['url'], ie=KikaIE,
|
||||
**traverse_obj(item, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
}))
|
||||
|
||||
playlist_url = traverse_obj(data, ('links', 'next', {url_or_none}))
|
||||
if not playlist_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
brand_data = self._download_json(
|
||||
f'https://www.kika.de/_next-api/proxy/v1/brands/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(brand_data['videoSubchannel']['videosPageUrl'], playlist_id),
|
||||
playlist_id, title=brand_data.get('title'), description=brand_data.get('description'))
|
||||
|
@ -2,8 +2,11 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -80,7 +83,7 @@ class LRTVODIE(LRTBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = self._match_valid_url(url).groups()
|
||||
path, video_id = self._match_valid_url(url).group('path', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||
@ -106,3 +109,42 @@ class LRTVODIE(LRTBaseIE):
|
||||
}
|
||||
|
||||
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||
|
||||
|
||||
class LRTRadioIE(LRTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/radioteka/irasas/(?P<id>\d+)/(?P<path>[^?#/]+)'
|
||||
_TESTS = [{
|
||||
# m3u8 download
|
||||
'url': 'https://www.lrt.lt/radioteka/irasas/2000359728/nemarios-eiles-apie-pragarus-ir-skaistyklas-su-aiste-kiltinaviciute',
|
||||
'info_dict': {
|
||||
'id': '2000359728',
|
||||
'ext': 'm4a',
|
||||
'title': 'Nemarios eilės: apie pragarus ir skaistyklas su Aiste Kiltinavičiūte',
|
||||
'description': 'md5:5eee9a0e86a55bf547bd67596204625d',
|
||||
'timestamp': 1726143120,
|
||||
'upload_date': '20240912',
|
||||
'tags': 'count:5',
|
||||
'thumbnail': r're:https?://.+/.+\.jpe?g',
|
||||
'categories': ['Daiktiniai įrodymai'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.lrt.lt/radioteka/irasas/2000304654/vakaras-su-knyga-svetlana-aleksijevic-cernobylio-malda-v-dalis?season=%2Fmediateka%2Faudio%2Fvakaras-su-knyga%2F2023',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, path = self._match_valid_url(url).group('id', 'path')
|
||||
media = self._download_json(
|
||||
'https://www.lrt.lt/radioteka/api/media', video_id,
|
||||
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
|
||||
|
||||
return traverse_obj(media, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||
'description': ('content', {clean_html}, {str}),
|
||||
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
|
||||
})
|
||||
|
@ -27,6 +27,7 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
@ -985,6 +986,7 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': latency,
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
'room': {
|
||||
@ -1005,6 +1007,7 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
if data.get('type') == 'stream':
|
||||
m3u8_url = data['data']['uri']
|
||||
qualities = data['data']['availableQualities']
|
||||
cookies = data['data']['cookies']
|
||||
break
|
||||
elif data.get('type') == 'disconnect':
|
||||
self.write_debug(recv)
|
||||
@ -1043,6 +1046,11 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
**res,
|
||||
})
|
||||
|
||||
for cookie in cookies:
|
||||
self._set_cookie(
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||
fmt.update({
|
||||
|
@ -2044,7 +2044,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?|wss?):)?//', url) else None
|
||||
|
||||
|
||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||
|
Loading…
x
Reference in New Issue
Block a user