Loading read_humans.py 0 → 100644 +345 −0 Original line number Diff line number Diff line #!/usr/bin/env python # coding: utf-8 # In[14]: #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Mar 13 13:34:43 2021 """ #python script to read the "Modeling Naive Psychology of Characters in Simple Commonsense Stories Human needs" #!/usr/bin/env python3 import glob import os import subprocess import sys import argparse import re import csv import json import gzip import random import operator import collections import itertools from collections import defaultdict from ontology_create import onto_dict as onto_dict import spacy ###Added matching_list & stop_word_list to create the concept list stop_word_list = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"] def matching_lists(sentence_list): """ Function which matches a list of words from the last paragraphs of the Students Essay Corpus Texts with concepts from ConceptNet Parameters ---------- sentence_list : list List containing the sentences from the last paragraphs in the Students Essay Corpus Returns ------- matches : list list containing words matching with ConceptNet concepts Elements themselves are lists, these lists contain ConceptNet matches """ matches= [] temp_matches = [] for sentence in sentence_list: for word in sentence: if word in onto_dict.keys() and word not in stop_word_list: temp_matches.append(word) matches.append(temp_matches) temp_matches = [] return matches def read_file(data): graph = [] clean_graph=[] n=0 a=0 out=[] input_sentence=[] input_char=[] tags=[] tag='' flag=False #human_needs=["curiosity","serenity","idealism","independence","competition","honor","approval","power","status","romance","belonging","family","social contract","health","savings","order","safety","food","rest","none"] #human_needs=['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest'] human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest'] #human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'contact', 'savings', 'idealism', 'rest'] #super_class = {'physiological':['food','rest'] , 'love':['love','belonging', 'social','family'], 'spiritual growth':['curiosity','idealism','independent','competition','calm'],'esteem':['power','honor','approval','competition','status'],'stability':['health','order','save_money','safety']} #human_needs=['[]'] #human_needs =['physiological', 'love', 'spiritual growth', 'esteem', 'stability'] count_line=0 count_story=0 context=[] con='' classification=[] n=[] motivation={} line_num='' story_ids=[] ex_story_ids=[] w='' s='' temp_motivation=[] indicator = False distribution= [0] * len(human_needs) with open(data, newline='') as csvfile: lines= csv.reader(csvfile) for line in lines: if count_story==0: count_story=count_story+1 story_id=line[0] ex_story_ids=line[0] elif story_id!=line[0]: ex_story_ids=story_id story_id=line[0] count_story=count_story+1 else: story_id=line[0] count_story=count_story line[-1]=line[-1].replace("[", "") line[-1]=line[-1].replace("]", "") line[-1]=line[-1].replace('"', "") line[-1]=line[-1].split(",") if line[-4]=='yes': if indicator ==False: s = line[-5] c = line[2] con = line[4] line_num=line[1] indicator=True if count_line==0: count_line=count_line+1 s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 line[-3]=line[-3].replace("[", "") line[-3]=line[-3].replace("]", "") line[-3]=line[-3].replace('"', "") line[-3]=line[-3].replace(",", "") #temp_motivation.append(line[-3]) if s == line[-5]: if c == line[2]: #context.append(con) distribution= [0] * len(human_needs) for i in range(len(line[-1])): classification.append(str(line[-1][i])) if line[-1][i] in human_needs: pos= human_needs.index(line[-1][i]) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) #motivation[count]=temp_motivation #temp_motivation=[] count=count+1 input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: if s == line[-5]: if c == line[2]: con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 line[-3]=line[-3].replace("[", "") line[-3]=line[-3].replace("]", "") line[-3]=line[-3].replace('"', "") line[-3]=line[-3].replace(",", "") temp_motivation.append(line[-3]) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: if indicator==True: indicator=False story_ids.append(w+'__sent'+str(line_num)) context.append(con) n=1 input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: # No more lines to be read from file story_ids.append(w+'__sent'+str(line_num)) context.append(con) n=1 input_sentence.append(s) input_char.append(c) out.append(distribution) nlp = spacy.load('en_core_web_sm') doc_list = [] temp =[] for sentence in input_sentence: doc = nlp(sentence) for token in doc: temp.append(token.lemma_) doc_list.append(temp) temp =[] doc= "" ### The file that we create, human_need_list shows the human need for each sentence, the tru_list contains the final human need that got chosen randomly human_need_list = [] new_file = r'C:\Users\PC\Desktop\Coli neu\SWP\experiment.txt' count=0 temp_list = [] with open(new_file, 'w') as file: for i in range(len(out)): if sum(out[i])!=0: if 2 in out[i] or 3 in out[i]: for n, a in enumerate(out[i]): if a==1: out[i][n]=0 if a==2 or a==3: out[i][n]=1 count=count+1 #print(out) temp_list = [] for concepts in out: for i in range(len(concepts)): if concepts[i] == max(concepts) and concepts[i] != 0: temp_list.append(human_needs[i]) human_need_list.append(temp_list) temp_list = [] #print(concept_list) # print(len(human_need_list)) # print(len(out)) tru_list = [] for hn in human_need_list: if len(hn) != 0: i = random.choice(hn) tru_list.append(i) else: tru_list.append('None') #print(concept_list) # print(tru_list) # print(len(tru_list)) # print(input_sentence) concept_list = matching_lists(doc_list) ### add here whatever you want to have in your file (tru_list, story ids ...) for i in range(len(out)): print(story_ids[i],'\t',input_sentence[i],'\t',out[i]) file.write(str(story_ids[i]) +"\t" + str(input_sentence[i]) + "\t" + str(out[i]) + "\n") #### for neural model #print(story_ids[i],'\t',tru_list[i].replace('|',' '),'\t',input_sentence[i],'\t',concept_list[i]) #file.write(str((story_ids[i] + "\t" + context[i].replace('|',' ') + "\t" + input_sentence[i] + "\t", concept_list[i]))) #### for training/test data return def main(): #parser = argparse.ArgumentParser() #parser.add_argument("txtfile", help=".txt file containing the input text", nargs='?') #args = parser.parse_args() ### Choose the file with the essays/sentences read_file(r'C:\Users\PC\Desktop\Coli neu\SWP\Multi-Hop-Knowledge-Paths-Human-Needs-master\csv_version\dev\motiv\allcharlinepairs.csv') if __name__ == '__main__': main() Loading
read_humans.py 0 → 100644 +345 −0 Original line number Diff line number Diff line #!/usr/bin/env python # coding: utf-8 # In[14]: #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Mar 13 13:34:43 2021 """ #python script to read the "Modeling Naive Psychology of Characters in Simple Commonsense Stories Human needs" #!/usr/bin/env python3 import glob import os import subprocess import sys import argparse import re import csv import json import gzip import random import operator import collections import itertools from collections import defaultdict from ontology_create import onto_dict as onto_dict import spacy ###Added matching_list & stop_word_list to create the concept list stop_word_list = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"] def matching_lists(sentence_list): """ Function which matches a list of words from the last paragraphs of the Students Essay Corpus Texts with concepts from ConceptNet Parameters ---------- sentence_list : list List containing the sentences from the last paragraphs in the Students Essay Corpus Returns ------- matches : list list containing words matching with ConceptNet concepts Elements themselves are lists, these lists contain ConceptNet matches """ matches= [] temp_matches = [] for sentence in sentence_list: for word in sentence: if word in onto_dict.keys() and word not in stop_word_list: temp_matches.append(word) matches.append(temp_matches) temp_matches = [] return matches def read_file(data): graph = [] clean_graph=[] n=0 a=0 out=[] input_sentence=[] input_char=[] tags=[] tag='' flag=False #human_needs=["curiosity","serenity","idealism","independence","competition","honor","approval","power","status","romance","belonging","family","social contract","health","savings","order","safety","food","rest","none"] #human_needs=['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest'] human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest'] #human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'contact', 'savings', 'idealism', 'rest'] #super_class = {'physiological':['food','rest'] , 'love':['love','belonging', 'social','family'], 'spiritual growth':['curiosity','idealism','independent','competition','calm'],'esteem':['power','honor','approval','competition','status'],'stability':['health','order','save_money','safety']} #human_needs=['[]'] #human_needs =['physiological', 'love', 'spiritual growth', 'esteem', 'stability'] count_line=0 count_story=0 context=[] con='' classification=[] n=[] motivation={} line_num='' story_ids=[] ex_story_ids=[] w='' s='' temp_motivation=[] indicator = False distribution= [0] * len(human_needs) with open(data, newline='') as csvfile: lines= csv.reader(csvfile) for line in lines: if count_story==0: count_story=count_story+1 story_id=line[0] ex_story_ids=line[0] elif story_id!=line[0]: ex_story_ids=story_id story_id=line[0] count_story=count_story+1 else: story_id=line[0] count_story=count_story line[-1]=line[-1].replace("[", "") line[-1]=line[-1].replace("]", "") line[-1]=line[-1].replace('"', "") line[-1]=line[-1].split(",") if line[-4]=='yes': if indicator ==False: s = line[-5] c = line[2] con = line[4] line_num=line[1] indicator=True if count_line==0: count_line=count_line+1 s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 line[-3]=line[-3].replace("[", "") line[-3]=line[-3].replace("]", "") line[-3]=line[-3].replace('"', "") line[-3]=line[-3].replace(",", "") #temp_motivation.append(line[-3]) if s == line[-5]: if c == line[2]: #context.append(con) distribution= [0] * len(human_needs) for i in range(len(line[-1])): classification.append(str(line[-1][i])) if line[-1][i] in human_needs: pos= human_needs.index(line[-1][i]) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) #motivation[count]=temp_motivation #temp_motivation=[] count=count+1 input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: if s == line[-5]: if c == line[2]: con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 line[-3]=line[-3].replace("[", "") line[-3]=line[-3].replace("]", "") line[-3]=line[-3].replace('"', "") line[-3]=line[-3].replace(",", "") temp_motivation.append(line[-3]) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: story_ids.append(w+'__sent'+str(line_num)) context.append(con) input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: if indicator==True: indicator=False story_ids.append(w+'__sent'+str(line_num)) context.append(con) n=1 input_sentence.append(s) input_char.append(c) out.append(distribution) s = line[-5] c = line[2] con = line[4] line_num=line[1] w=line[0] if con == '': con = "No Context" line_num=1 distribution= [0] * len(human_needs) for i in range(len(line[-1])): if line[-1][i].strip() in human_needs: pos= human_needs.index(line[-1][i].strip()) distribution[pos]=distribution[pos]+1 else: # No more lines to be read from file story_ids.append(w+'__sent'+str(line_num)) context.append(con) n=1 input_sentence.append(s) input_char.append(c) out.append(distribution) nlp = spacy.load('en_core_web_sm') doc_list = [] temp =[] for sentence in input_sentence: doc = nlp(sentence) for token in doc: temp.append(token.lemma_) doc_list.append(temp) temp =[] doc= "" ### The file that we create, human_need_list shows the human need for each sentence, the tru_list contains the final human need that got chosen randomly human_need_list = [] new_file = r'C:\Users\PC\Desktop\Coli neu\SWP\experiment.txt' count=0 temp_list = [] with open(new_file, 'w') as file: for i in range(len(out)): if sum(out[i])!=0: if 2 in out[i] or 3 in out[i]: for n, a in enumerate(out[i]): if a==1: out[i][n]=0 if a==2 or a==3: out[i][n]=1 count=count+1 #print(out) temp_list = [] for concepts in out: for i in range(len(concepts)): if concepts[i] == max(concepts) and concepts[i] != 0: temp_list.append(human_needs[i]) human_need_list.append(temp_list) temp_list = [] #print(concept_list) # print(len(human_need_list)) # print(len(out)) tru_list = [] for hn in human_need_list: if len(hn) != 0: i = random.choice(hn) tru_list.append(i) else: tru_list.append('None') #print(concept_list) # print(tru_list) # print(len(tru_list)) # print(input_sentence) concept_list = matching_lists(doc_list) ### add here whatever you want to have in your file (tru_list, story ids ...) for i in range(len(out)): print(story_ids[i],'\t',input_sentence[i],'\t',out[i]) file.write(str(story_ids[i]) +"\t" + str(input_sentence[i]) + "\t" + str(out[i]) + "\n") #### for neural model #print(story_ids[i],'\t',tru_list[i].replace('|',' '),'\t',input_sentence[i],'\t',concept_list[i]) #file.write(str((story_ids[i] + "\t" + context[i].replace('|',' ') + "\t" + input_sentence[i] + "\t", concept_list[i]))) #### for training/test data return def main(): #parser = argparse.ArgumentParser() #parser.add_argument("txtfile", help=".txt file containing the input text", nargs='?') #args = parser.parse_args() ### Choose the file with the essays/sentences read_file(r'C:\Users\PC\Desktop\Coli neu\SWP\Multi-Hop-Knowledge-Paths-Human-Needs-master\csv_version\dev\motiv\allcharlinepairs.csv') if __name__ == '__main__': main()