Browse Source

[utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames

Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
pull/31304/head
dirkf 4 months ago committed by GitHub
parent
commit
c94a459a24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      youtube_dl/utils.py

4
youtube_dl/utils.py

@ -33,6 +33,7 @@ import sys @@ -33,6 +33,7 @@ import sys
import tempfile
import time
import traceback
import unicodedata
import xml.etree.ElementTree
import zlib
@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False): @@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
return '_'
return char
# Replace look-alike Unicode glyphs
if restricted and not is_id:
s = unicodedata.normalize('NFKC', s)
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s))

Loading…
Cancel
Save