Commit efd1bc96 authored by kuehner's avatar kuehner
Browse files

Upload New File

parent a1f65927
Loading
Loading
Loading
Loading

read_humans.py

0 → 100644
+345 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
# coding: utf-8

# In[14]:


#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 13 13:34:43 2021
"""
#python script to read the "Modeling Naive Psychology of Characters in Simple Commonsense Stories Human needs"
#!/usr/bin/env python3
import glob
import os
import subprocess
import sys
import argparse
import re
import csv
import json
import gzip
import random
import operator
import collections
import itertools
from collections import defaultdict
from ontology_create import onto_dict as onto_dict
import spacy


###Added matching_list & stop_word_list to create the concept list

stop_word_list = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
def matching_lists(sentence_list):
    """
    Function which matches a list of words from the last paragraphs of the 
    Students Essay Corpus Texts with concepts from ConceptNet

    Parameters
    ----------
    sentence_list : list
        List containing the sentences from the last paragraphs in the 
        Students Essay Corpus

    Returns
    -------
    matches : list
        list containing words matching with ConceptNet concepts
        Elements themselves are lists, these lists contain ConceptNet matches

    """
    matches= []
    temp_matches = []
    for sentence in sentence_list:
        for word in sentence:
            if word in onto_dict.keys() and word not in stop_word_list:
                temp_matches.append(word)
        matches.append(temp_matches)
        temp_matches = []
    return matches


def read_file(data):
    graph = []
    clean_graph=[]
    n=0
    a=0
    out=[]
    input_sentence=[]
    input_char=[]
    tags=[]
    tag=''
    flag=False
    #human_needs=["curiosity","serenity","idealism","independence","competition","honor","approval","power","status","romance","belonging","family","social contract","health","savings","order","safety","food","rest","none"]
    #human_needs=['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest']
    human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest']
    #human_needs = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'contact', 'savings', 'idealism', 'rest']
    #super_class = {'physiological':['food','rest'] , 'love':['love','belonging', 'social','family'], 'spiritual growth':['curiosity','idealism','independent','competition','calm'],'esteem':['power','honor','approval','competition','status'],'stability':['health','order','save_money','safety']}
  
    #human_needs=['[]']
    #human_needs =['physiological', 'love', 'spiritual growth', 'esteem', 'stability']
    count_line=0 
    count_story=0
    context=[]
    con=''
    classification=[]
    n=[]
    motivation={}
    line_num=''
    story_ids=[]
    ex_story_ids=[]
    w=''
    s=''
    temp_motivation=[]
    indicator = False
    distribution= [0] * len(human_needs)
    
    with open(data, newline='') as csvfile:
        lines= csv.reader(csvfile)
        
        for line in lines:
             if count_story==0:
                    count_story=count_story+1
                    story_id=line[0]
                    ex_story_ids=line[0]
                    
             elif story_id!=line[0]:
                 ex_story_ids=story_id
                 story_id=line[0]
                 count_story=count_story+1
             else:
                 story_id=line[0]
                 count_story=count_story 

             line[-1]=line[-1].replace("[", "")
             line[-1]=line[-1].replace("]", "")
             line[-1]=line[-1].replace('"', "")
             line[-1]=line[-1].split(",")
             if line[-4]=='yes':
               if indicator ==False:
                     s = line[-5]
                     c = line[2]
                     con = line[4]
                     line_num=line[1]
                     indicator=True
               if count_line==0:
                count_line=count_line+1
                s = line[-5]
                c = line[2]
                con = line[4]
                
                line_num=line[1]
                w=line[0]
                if con == '':
                       con = "No Context"
                       line_num=1
                line[-3]=line[-3].replace("[", "")
                line[-3]=line[-3].replace("]", "")
                line[-3]=line[-3].replace('"', "")
                line[-3]=line[-3].replace(",", "")
                #temp_motivation.append(line[-3])
                if s == line[-5]:
                  if c == line[2]:
                   #context.append(con)
                   distribution= [0] * len(human_needs)
                   
                   for i in range(len(line[-1])):
                      classification.append(str(line[-1][i]))
                      
                      if line[-1][i] in human_needs:                            
                             pos= human_needs.index(line[-1][i])
                             distribution[pos]=distribution[pos]+1
                  else: 
                      story_ids.append(w+'__sent'+str(line_num))
                      context.append(con)
                      #motivation[count]=temp_motivation
                      #temp_motivation=[]
                      count=count+1
                      input_sentence.append(s)
                      input_char.append(c)
                      
                      out.append(distribution)
                      s = line[-5]
                      c = line[2]
                      con = line[4]
                      line_num=line[1]
                      if con == '':
                         con = "No Context"
                         line_num=1
                      distribution= [0] * len(human_needs)
                      
                      for i in range(len(line[-1])):
                        if line[-1][i].strip() in human_needs:                      
                                pos= human_needs.index(line[-1][i].strip())
                                distribution[pos]=distribution[pos]+1          
               else:
                if s == line[-5]:
                  if c == line[2]:
                    con = line[4]
                    line_num=line[1]
                    w=line[0]
                    if con == '':
                       con = "No Context"
                       line_num=1
                    line[-3]=line[-3].replace("[", "")
                    line[-3]=line[-3].replace("]", "")
                    line[-3]=line[-3].replace('"', "")
                    line[-3]=line[-3].replace(",", "")
                    temp_motivation.append(line[-3])    
                    

                    for i in range(len(line[-1])):
                        if line[-1][i].strip() in human_needs:                      
                                pos= human_needs.index(line[-1][i].strip())
                                distribution[pos]=distribution[pos]+1  
                              
                  else: 
                           story_ids.append(w+'__sent'+str(line_num))
                           context.append(con)
                           input_sentence.append(s)
                           input_char.append(c)
                           out.append(distribution)
                           s = line[-5]
                           c = line[2]
                           con = line[4]
                           line_num=line[1]
                           w=line[0]
                           if con == '':
                                con = "No Context"
                           line_num=1
                           distribution= [0] * len(human_needs)
                           for i in range(len(line[-1])):
                              if line[-1][i].strip() in human_needs:                      
                                pos= human_needs.index(line[-1][i].strip())
                                distribution[pos]=distribution[pos]+1 
                else:
                           
                           story_ids.append(w+'__sent'+str(line_num))
                           context.append(con)
                           input_sentence.append(s)
                           input_char.append(c)
                      
                           out.append(distribution)
                           s = line[-5]
                           c = line[2]
                           con = line[4]
                           line_num=line[1]
                           w=line[0]
                           if con == '':
                                con = "No Context"
                           line_num=1
                           distribution= [0] * len(human_needs)
                           for i in range(len(line[-1])):
                             if line[-1][i].strip() in human_needs:                      
                                pos= human_needs.index(line[-1][i].strip())
                                distribution[pos]=distribution[pos]+1                  
                     
             else:
                if indicator==True:
                    indicator=False
                    story_ids.append(w+'__sent'+str(line_num))
                    context.append(con)
                    n=1
                    input_sentence.append(s)
                    input_char.append(c)
                    out.append(distribution)
                    s = line[-5]
                    c = line[2]
                    con = line[4]
                    line_num=line[1]
                    w=line[0]
                    if con == '':
                       con = "No Context"
                       line_num=1
                    distribution= [0] * len(human_needs)
                    for i in range(len(line[-1])):
                        if line[-1][i].strip() in human_needs:                      
                                pos= human_needs.index(line[-1][i].strip())
                                distribution[pos]=distribution[pos]+1 
        else:
        # No more lines to be read from file
            story_ids.append(w+'__sent'+str(line_num))
            context.append(con)
            n=1
            input_sentence.append(s)
            input_char.append(c)
            out.append(distribution)
    
        
    nlp = spacy.load('en_core_web_sm')
    doc_list = []
    temp =[]
    for sentence in input_sentence:
        doc = nlp(sentence)
        for token in doc:
            temp.append(token.lemma_)
        doc_list.append(temp)
        temp =[]
        doc= ""

    
    
    ### The file that we create, human_need_list shows the human need for each sentence, the tru_list contains the final human need that got chosen randomly
    
    human_need_list = []
    new_file = r'C:\Users\PC\Desktop\Coli neu\SWP\experiment.txt'
    count=0
    temp_list = []
    with open(new_file, 'w') as file:
        for i in range(len(out)):
            if sum(out[i])!=0:
               if 2 in out[i] or 3 in out[i]:
                  for n, a in enumerate(out[i]):
                     if a==1:
                         out[i][n]=0
                     if a==2 or a==3:
                         out[i][n]=1
                  count=count+1
        #print(out)
        temp_list = []
        for concepts in out:
            for i in range(len(concepts)):
                if concepts[i] == max(concepts) and concepts[i] != 0:
                    temp_list.append(human_needs[i])
            human_need_list.append(temp_list)
            temp_list = []
        #print(concept_list)
        
#         print(len(human_need_list)) 
#         print(len(out))
        tru_list = []
        for hn in human_need_list:
            if len(hn) != 0:
                i = random.choice(hn)
                tru_list.append(i)
            else:
                tru_list.append('None')
        #print(concept_list)
#         print(tru_list)
#         print(len(tru_list))
#         print(input_sentence)
        
        concept_list = matching_lists(doc_list)
        ### add here whatever you want to have in your file (tru_list, story ids ...)
        for i in range(len(out)):
            print(story_ids[i],'\t',input_sentence[i],'\t',out[i])
            file.write(str(story_ids[i]) +"\t" + str(input_sentence[i]) + "\t" + str(out[i]) + "\n") #### for neural model
            
            #print(story_ids[i],'\t',tru_list[i].replace('|',' '),'\t',input_sentence[i],'\t',concept_list[i])
            #file.write(str((story_ids[i] + "\t" + context[i].replace('|',' ') + "\t" + input_sentence[i] + "\t", concept_list[i]))) #### for training/test data
        
    return 

def main():
    #parser = argparse.ArgumentParser()
    #parser.add_argument("txtfile", help=".txt file containing the input text", nargs='?')
    #args = parser.parse_args()
    ### Choose the file with the essays/sentences
    read_file(r'C:\Users\PC\Desktop\Coli neu\SWP\Multi-Hop-Knowledge-Paths-Human-Needs-master\csv_version\dev\motiv\allcharlinepairs.csv')


if __name__ == '__main__':
    main()