Loading karaokatalog/deduplicate/__main__.py 0 → 100644 +46 −0 Original line number Diff line number Diff line from karaokatalog.Library import Library from karaokatalog.Song import Song from karaokatalog.deduplicate.find_duplicates import find_duplicates from karaokatalog.deduplicate.prune import prune from karaokatalog.util.get_equivalence_classes import get_equivalence_classes from pathlib import Path from tqdm import tqdm import sys import logging logging.basicConfig( format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO ) if __name__ == "__main__": logging.info("Karaokatalog Deduplication started") logging.info("Loading library") library = Library.from_dir(Path(sys.argv[1])) logging.info("Library loaded") logging.info("Finding duplicates (songs with identical title and artist)") duplicates = find_duplicates(library) logging.info("Duplicates found") logging.info("Finding exact duplicates (files are 100% identical)") # TODO Make this abortable and restartable exact_duplicates = tuple( equivalence_class for songs in tqdm(duplicates.values(), unit=" duplicates") for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files) ) logging.info("Exact duplicates found") logging.info("Determining songs to prune") pruning_instructions = tuple( instruction for equivalence_class in tqdm(exact_duplicates, unit=" exact duplicates") for instruction in prune(equivalence_class) ) logging.info(f"{len(pruning_instructions)} exactly duplicated songs will be deleted") # TODO Call all pruning_instructions, to actually delete the files logging.info("Karaokatalog Deduplication done") Loading
karaokatalog/deduplicate/__main__.py 0 → 100644 +46 −0 Original line number Diff line number Diff line from karaokatalog.Library import Library from karaokatalog.Song import Song from karaokatalog.deduplicate.find_duplicates import find_duplicates from karaokatalog.deduplicate.prune import prune from karaokatalog.util.get_equivalence_classes import get_equivalence_classes from pathlib import Path from tqdm import tqdm import sys import logging logging.basicConfig( format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO ) if __name__ == "__main__": logging.info("Karaokatalog Deduplication started") logging.info("Loading library") library = Library.from_dir(Path(sys.argv[1])) logging.info("Library loaded") logging.info("Finding duplicates (songs with identical title and artist)") duplicates = find_duplicates(library) logging.info("Duplicates found") logging.info("Finding exact duplicates (files are 100% identical)") # TODO Make this abortable and restartable exact_duplicates = tuple( equivalence_class for songs in tqdm(duplicates.values(), unit=" duplicates") for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files) ) logging.info("Exact duplicates found") logging.info("Determining songs to prune") pruning_instructions = tuple( instruction for equivalence_class in tqdm(exact_duplicates, unit=" exact duplicates") for instruction in prune(equivalence_class) ) logging.info(f"{len(pruning_instructions)} exactly duplicated songs will be deleted") # TODO Call all pruning_instructions, to actually delete the files logging.info("Karaokatalog Deduplication done")