Commit 6d7ad93b authored by born's avatar born
Browse files

Fixed indendation

parent 57490f92
Loading
Loading
Loading
Loading
+8 −9
Original line number Diff line number Diff line
@@ -5,8 +5,7 @@ import codecs
import sys
import nltk
import os
import matplotlib.pyplot as plt
from constantseveryEvent import KEYWORD_SEEDS, EVENTS_TO_RANGE
from constants import KEYWORD_SEEDS, EVENTS_TO_RANGE

path_to_newsroom_data = sys.argv[1]
corpustype = path_to_newsroom_data.split(".data")[0]
@@ -17,10 +16,10 @@ corpustype = corpustype.split("/")[-1]
#keyword_seed = KEYWORD_SEEDS[event_name]

ArticlePerTopic = {}
for x in EVENTS_TO_RANGE:
	ArticlePerTopic[x] = 1
    IDfile = codecs.open(x + "/"+x + "SimpleIDs.txt", "w" , "utf-8")
    IDfile.write("#"+topic+" #simple #" + str(EVENTS_TO_RANGE[topic][0]) +" - " + str(EVENTS_TO_RANGE[topic][1] + "\n")
for topic in EVENTS_TO_RANGE:
	ArticlePerTopic[topic] = 1
	IDfile = codecs.open(topic + "SimpleIDs.txt", "w" , "utf-8")
	IDfile.write("#"+topic+" #simple #" + str(EVENTS_TO_RANGE[topic][0]) +" - " + str(EVENTS_TO_RANGE[topic][1]) + "\n")
	IDfile.close()
with jsonl.open(path_to_newsroom_data, gzip = True) as train_file:
	#print(event_name + " Extracting for previous mentioned event could take some hours")
@@ -54,8 +53,8 @@ with jsonl.open(path_to_newsroom_data, gzip = True) as train_file:
					day = entry["date"][6:8]
					date = datetime.date(int(year),int(month),int(day))
					if date > earliest and date < latest:
                        output = codecs.open(event_source_name + "/"+event_source_name + "SimpleIDs.txt", "a" , "utf-8")
						output.write(str(entry[archive]) + " " + str(entry[title]) +"\n")
						output = codecs.open(event_source_name + "SimpleIDs.txt", "a" , "utf-8")
						output.write(str(entry["archive"]) + " " + str(entry["title"]) +"\n")
						ArticlePerTopic[event_source_name] = ArticlePerTopic[event_source_name] + 1
print(ArticlePerTopic)