From 1d3751c3fe50b203d3e2bff71d866c8c500f8288 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 1 Jun 2021 18:05:41 +0530 Subject: [PATCH] Escape URLs in `sanitized_Request`, not `sanitize_url` d2558234cf5dd12d6896eed5427b7dcdb3ab7b5a added escaping of URLs while sanitizing. However, `sanitize_url` may not always receive an actual URL. Eg: When using `youtube-dl "search query" --default-search ytsearch`, `search query` gets escaped to `search%20query` before being prefixed with `ytsearch:` which is not the intended behavior. So the escaping is moved to `sanitized_Request` instead. --- test/test_utils.py | 1 + youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ youtube_dl/utils.py | 4 ++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9d364c863..ea2b96ed2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -250,6 +250,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('foo bar'), 'foo bar') def test_expand_path(self): def env(var): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0e473e952..b01900afa 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2320,6 +2320,25 @@ class GenericIE(InfoExtractor): 'height': 720, 'age_limit': 18, }, + }, { + # would like to use the yt-dl test video but searching for + # '"\'/\\รคโ†ญ๐•' fails, so using an old vid from YouTube Korea + 'note': 'Test default search', + 'url': 'Shorts๋กœ ํ—ˆ๋ฝ ํ•„์š”์—†์ด ๋†€์ž! (BTSํŽธ)', + 'info_dict': { + 'id': 'usDGO4Zb-dc', + 'ext': 'mp4', + 'title': 'YouTube Shorts๋กœ ํ—ˆ๋ฝ ํ•„์š”์—†์ด ๋†€์ž! (BTSํŽธ)', + 'description': 'md5:96e31607eba81ab441567b5e289f4716', + 'upload_date': '20211107', + 'uploader': 'YouTube Korea', + 'location': '๋Œ€ํ•œ๋ฏผ๊ตญ', + }, + 'params': { + 'default_search': 'ytsearch', + 'skip_download': True, + }, + 'expected_warnings': ['uploader id'], }, ] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4edbfa27b..761edcd49 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2176,11 +2176,11 @@ def sanitize_url(url): for mistake, fixup in COMMON_TYPOS: if re.match(mistake, url): return re.sub(mistake, fixup, url) - return escape_url(url) + return url def sanitized_Request(url, *args, **kwargs): - return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) + return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs) def expand_path(s):