Commit 550b441c authored by vhoepfl's avatar vhoepfl
Browse files

adding seed

parent ea166f08
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
import re
import random
random.seed(42)

filepaths_sme = [
    "data/sme/sme_wikipedia_2021_10K/sme_wikipedia_2021_10K-sentences.txt", 
@@ -32,7 +33,6 @@ def leipzig_files_to_corpus(filepaths, out_corpus_path):
                random.shuffle(lines)
            fw.writelines(lines)
        
        
# Use either filepaths_nob or filepaths_sme, adapt output path to _sme
leipzig_files_to_corpus(filepaths_sme, "data/corpus_sme.txt")
leipzig_files_to_corpus(filepaths_nob, "data/corpus_nob.txt")