Loading karaokatalog/recode/recode.py +31 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,36 @@ from pathlib import Path from karaokatalog.instructions.RecodeInstruction import RecodeInstruction ENCODINGS_TO_TRY = ( "utf-8", "cp1252", ) BOM = "\ufeff" # Byte Order Mark, totally useless for UTF-8, still sometimes there def try_reading_file_with_encoding(path: Path, encoding: str) -> str: with path.open("r", encoding=encoding) as f: return f.read() def guess_encoding(path: Path) -> str: for encoding_to_try in ENCODINGS_TO_TRY: try: content = try_reading_file_with_encoding(path, encoding_to_try) if encoding_to_try == "utf-8" and content.startswith(BOM): return "utf-8-sig" else: return encoding_to_try except UnicodeDecodeError: pass # This was not the right encoding, let's try again raise UnicodeError("Could not guess encoding.") def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]: raise NotImplementedError() return [ RecodeInstruction(path, old_encoding=old_encoding, new_encoding="utf-8") for path in paths if (old_encoding := guess_encoding(path)) != "utf-8" ] Loading
karaokatalog/recode/recode.py +31 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,36 @@ from pathlib import Path from karaokatalog.instructions.RecodeInstruction import RecodeInstruction ENCODINGS_TO_TRY = ( "utf-8", "cp1252", ) BOM = "\ufeff" # Byte Order Mark, totally useless for UTF-8, still sometimes there def try_reading_file_with_encoding(path: Path, encoding: str) -> str: with path.open("r", encoding=encoding) as f: return f.read() def guess_encoding(path: Path) -> str: for encoding_to_try in ENCODINGS_TO_TRY: try: content = try_reading_file_with_encoding(path, encoding_to_try) if encoding_to_try == "utf-8" and content.startswith(BOM): return "utf-8-sig" else: return encoding_to_try except UnicodeDecodeError: pass # This was not the right encoding, let's try again raise UnicodeError("Could not guess encoding.") def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]: raise NotImplementedError() return [ RecodeInstruction(path, old_encoding=old_encoding, new_encoding="utf-8") for path in paths if (old_encoding := guess_encoding(path)) != "utf-8" ]