Verified Commit 85ba8659 authored by Jakob Moser's avatar Jakob Moser
Browse files

Implement recoding

parent bbdaa34d
Loading
Loading
Loading
Loading
+31 −1
Original line number Diff line number Diff line
@@ -3,6 +3,36 @@ from pathlib import Path

from karaokatalog.instructions.RecodeInstruction import RecodeInstruction

ENCODINGS_TO_TRY = (
    "utf-8",
    "cp1252",
)

BOM = "\ufeff"  # Byte Order Mark, totally useless for UTF-8, still sometimes there


def try_reading_file_with_encoding(path: Path, encoding: str) -> str:
    with path.open("r", encoding=encoding) as f:
        return f.read()


def guess_encoding(path: Path) -> str:
    for encoding_to_try in ENCODINGS_TO_TRY:
        try:
            content = try_reading_file_with_encoding(path, encoding_to_try)
            if encoding_to_try == "utf-8" and content.startswith(BOM):
                return "utf-8-sig"
            else:
                return encoding_to_try
        except UnicodeDecodeError:
            pass  # This was not the right encoding, let's try again

    raise UnicodeError("Could not guess encoding.")


def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
    raise NotImplementedError()
    return [
        RecodeInstruction(path, old_encoding=old_encoding, new_encoding="utf-8")
        for path in paths
        if (old_encoding := guess_encoding(path)) != "utf-8"
    ]