Loading combine_sources_into_corpus.py +1 −1 Original line number Diff line number Diff line import re import random random.seed(42) filepaths_sme = [ "data/sme/sme_wikipedia_2021_10K/sme_wikipedia_2021_10K-sentences.txt", Loading Loading @@ -32,7 +33,6 @@ def leipzig_files_to_corpus(filepaths, out_corpus_path): random.shuffle(lines) fw.writelines(lines) # Use either filepaths_nob or filepaths_sme, adapt output path to _sme leipzig_files_to_corpus(filepaths_sme, "data/corpus_sme.txt") leipzig_files_to_corpus(filepaths_nob, "data/corpus_nob.txt") Loading
combine_sources_into_corpus.py +1 −1 Original line number Diff line number Diff line import re import random random.seed(42) filepaths_sme = [ "data/sme/sme_wikipedia_2021_10K/sme_wikipedia_2021_10K-sentences.txt", Loading Loading @@ -32,7 +33,6 @@ def leipzig_files_to_corpus(filepaths, out_corpus_path): random.shuffle(lines) fw.writelines(lines) # Use either filepaths_nob or filepaths_sme, adapt output path to _sme leipzig_files_to_corpus(filepaths_sme, "data/corpus_sme.txt") leipzig_files_to_corpus(filepaths_nob, "data/corpus_nob.txt")