Upload New File (efd1bc96) · Commits · pirapakaran / Softwareproject_ws2020_21

read_humans.py

0 → 100644

+345 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python
		# coding: utf-8

		# In[14]:


		#!/usr/bin/env python3
		# -- coding: utf-8 --
		"""
		Created on Sat Mar 13 13:34:43 2021
		"""
		#python script to read the "Modeling Naive Psychology of Characters in Simple Commonsense Stories Human needs"
		#!/usr/bin/env python3
		import glob
		import os
		import subprocess
		import sys
		import argparse
		import re
		import csv
		import json
		import gzip
		import random
		import operator
		import collections
		import itertools
		from collections import defaultdict
		from ontology_create import onto_dict as onto_dict
		import spacy


		###Added matching_list & stop_word_list to create the concept list

		stop_word_list = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
		def matching_lists(sentence_list):
		"""
		Function which matches a list of words from the last paragraphs of the
		Students Essay Corpus Texts with concepts from ConceptNet

		Parameters
		----------
		sentence_list : list
		List containing the sentences from the last paragraphs in the
		Students Essay Corpus

		Returns
		-------
		matches : list
		list containing words matching with ConceptNet concepts
		Elements themselves are lists, these lists contain ConceptNet matches

		"""
		matches= []
		temp_matches = []
		for sentence in sentence_list:
		for word in sentence:
		if word in onto_dict.keys() and word not in stop_word_list:
		temp_matches.append(word)
		matches.append(temp_matches)
		temp_matches = []
		return matches


		def read_file(data):
		graph = []
		clean_graph=[]
		n=0
		a=0
		out=[]
		input_sentence=[]
		input_char=[]
		tags=[]
		tag=''
		flag=False
		#human_needs=["curiosity","serenity","idealism","independence","competition","honor","approval","power","status","romance","belonging","family","social contract","health","savings","order","safety","food","rest","none"]
		#human_needs=['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest']
		human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest']
		#human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'contact', 'savings', 'idealism', 'rest']
		#super_class = {'physiological':['food','rest'] , 'love':['love','belonging', 'social','family'], 'spiritual growth':['curiosity','idealism','independent','competition','calm'],'esteem':['power','honor','approval','competition','status'],'stability':['health','order','save_money','safety']}

		#human_needs=['[]']
		#human_needs =['physiological', 'love', 'spiritual growth', 'esteem', 'stability']
		count_line=0
		count_story=0
		context=[]
		con=''
		classification=[]
		n=[]
		motivation={}
		line_num=''
		story_ids=[]
		ex_story_ids=[]
		w=''
		s=''
		temp_motivation=[]
		indicator = False
		distribution= [0] * len(human_needs)

		with open(data, newline='') as csvfile:
		lines= csv.reader(csvfile)

		for line in lines:
		if count_story==0:
		count_story=count_story+1
		story_id=line[0]
		ex_story_ids=line[0]

		elif story_id!=line[0]:
		ex_story_ids=story_id
		story_id=line[0]
		count_story=count_story+1
		else:
		story_id=line[0]
		count_story=count_story

		line[-1]=line[-1].replace("[", "")
		line[-1]=line[-1].replace("]", "")
		line[-1]=line[-1].replace('"', "")
		line[-1]=line[-1].split(",")
		if line[-4]=='yes':
		if indicator ==False:
		s = line[-5]
		c = line[2]
		con = line[4]
		line_num=line[1]
		indicator=True
		if count_line==0:
		count_line=count_line+1
		s = line[-5]
		c = line[2]
		con = line[4]

		line_num=line[1]
		w=line[0]
		if con == '':
		con = "No Context"
		line_num=1
		line[-3]=line[-3].replace("[", "")
		line[-3]=line[-3].replace("]", "")
		line[-3]=line[-3].replace('"', "")
		line[-3]=line[-3].replace(",", "")
		#temp_motivation.append(line[-3])
		if s == line[-5]:
		if c == line[2]:
		#context.append(con)
		distribution= [0] * len(human_needs)

		for i in range(len(line[-1])):
		classification.append(str(line[-1][i]))

		if line[-1][i] in human_needs:
		pos= human_needs.index(line[-1][i])
		distribution[pos]=distribution[pos]+1
		else:
		story_ids.append(w+'__sent'+str(line_num))
		context.append(con)
		#motivation[count]=temp_motivation
		#temp_motivation=[]
		count=count+1
		input_sentence.append(s)
		input_char.append(c)

		out.append(distribution)
		s = line[-5]
		c = line[2]
		con = line[4]
		line_num=line[1]
		if con == '':
		con = "No Context"
		line_num=1
		distribution= [0] * len(human_needs)

		for i in range(len(line[-1])):
		if line[-1][i].strip() in human_needs:
		pos= human_needs.index(line[-1][i].strip())
		distribution[pos]=distribution[pos]+1
		else:
		if s == line[-5]:
		if c == line[2]:
		con = line[4]
		line_num=line[1]
		w=line[0]
		if con == '':
		con = "No Context"
		line_num=1
		line[-3]=line[-3].replace("[", "")
		line[-3]=line[-3].replace("]", "")
		line[-3]=line[-3].replace('"', "")
		line[-3]=line[-3].replace(",", "")
		temp_motivation.append(line[-3])


		for i in range(len(line[-1])):
		if line[-1][i].strip() in human_needs:
		pos= human_needs.index(line[-1][i].strip())
		distribution[pos]=distribution[pos]+1

		else:
		story_ids.append(w+'__sent'+str(line_num))
		context.append(con)
		input_sentence.append(s)
		input_char.append(c)
		out.append(distribution)
		s = line[-5]
		c = line[2]
		con = line[4]
		line_num=line[1]
		w=line[0]
		if con == '':
		con = "No Context"
		line_num=1
		distribution= [0] * len(human_needs)
		for i in range(len(line[-1])):
		if line[-1][i].strip() in human_needs:
		pos= human_needs.index(line[-1][i].strip())
		distribution[pos]=distribution[pos]+1
		else:

		story_ids.append(w+'__sent'+str(line_num))
		context.append(con)
		input_sentence.append(s)
		input_char.append(c)

		out.append(distribution)
		s = line[-5]
		c = line[2]
		con = line[4]
		line_num=line[1]
		w=line[0]
		if con == '':
		con = "No Context"
		line_num=1
		distribution= [0] * len(human_needs)
		for i in range(len(line[-1])):
		if line[-1][i].strip() in human_needs:
		pos= human_needs.index(line[-1][i].strip())
		distribution[pos]=distribution[pos]+1

		else:
		if indicator==True:
		indicator=False
		story_ids.append(w+'__sent'+str(line_num))
		context.append(con)
		n=1
		input_sentence.append(s)
		input_char.append(c)
		out.append(distribution)
		s = line[-5]
		c = line[2]
		con = line[4]
		line_num=line[1]
		w=line[0]
		if con == '':
		con = "No Context"
		line_num=1
		distribution= [0] * len(human_needs)
		for i in range(len(line[-1])):
		if line[-1][i].strip() in human_needs:
		pos= human_needs.index(line[-1][i].strip())
		distribution[pos]=distribution[pos]+1
		else:
		# No more lines to be read from file
		story_ids.append(w+'__sent'+str(line_num))
		context.append(con)
		n=1
		input_sentence.append(s)
		input_char.append(c)
		out.append(distribution)


		nlp = spacy.load('en_core_web_sm')
		doc_list = []
		temp =[]
		for sentence in input_sentence:
		doc = nlp(sentence)
		for token in doc:
		temp.append(token.lemma_)
		doc_list.append(temp)
		temp =[]
		doc= ""



		### The file that we create, human_need_list shows the human need for each sentence, the tru_list contains the final human need that got chosen randomly

		human_need_list = []
		new_file = r'C:\Users\PC\Desktop\Coli neu\SWP\experiment.txt'
		count=0
		temp_list = []
		with open(new_file, 'w') as file:
		for i in range(len(out)):
		if sum(out[i])!=0:
		if 2 in out[i] or 3 in out[i]:
		for n, a in enumerate(out[i]):
		if a==1:
		out[i][n]=0
		if a==2 or a==3:
		out[i][n]=1
		count=count+1
		#print(out)
		temp_list = []
		for concepts in out:
		for i in range(len(concepts)):
		if concepts[i] == max(concepts) and concepts[i] != 0:
		temp_list.append(human_needs[i])
		human_need_list.append(temp_list)
		temp_list = []
		#print(concept_list)

		# print(len(human_need_list))
		# print(len(out))
		tru_list = []
		for hn in human_need_list:
		if len(hn) != 0:
		i = random.choice(hn)
		tru_list.append(i)
		else:
		tru_list.append('None')
		#print(concept_list)
		# print(tru_list)
		# print(len(tru_list))
		# print(input_sentence)

		concept_list = matching_lists(doc_list)
		### add here whatever you want to have in your file (tru_list, story ids ...)
		for i in range(len(out)):
		print(story_ids[i],'\t',input_sentence[i],'\t',out[i])
		file.write(str(story_ids[i]) +"\t" + str(input_sentence[i]) + "\t" + str(out[i]) + "\n") #### for neural model

		#print(story_ids[i],'\t',tru_list[i].replace('\|',' '),'\t',input_sentence[i],'\t',concept_list[i])
		#file.write(str((story_ids[i] + "\t" + context[i].replace('\|',' ') + "\t" + input_sentence[i] + "\t", concept_list[i]))) #### for training/test data

		return

		def main():
		#parser = argparse.ArgumentParser()
		#parser.add_argument("txtfile", help=".txt file containing the input text", nargs='?')
		#args = parser.parse_args()
		### Choose the file with the essays/sentences
		read_file(r'C:\Users\PC\Desktop\Coli neu\SWP\Multi-Hop-Knowledge-Paths-Human-Needs-master\csv_version\dev\motiv\allcharlinepairs.csv')


		if __name__ == '__main__':
		main()