Commit 6adef26d authored by vdberg's avatar vdberg
Browse files

fix

parent 050a529b
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -43,6 +43,13 @@ def ner(t):

### load sent_df

def hardcoded_clean(names):
    names.w = names.w.str.replace('\ufeff', '')
    names.w = names.w.str.replace('', '')
    names.w = names.w.str.replace('‘s', '')
    names.w = names.w.str.replace('2.', '')
    return names

sent = pd.read_csv('data/sent_df.csv')
sent = sent[sent.s.apply(crude_ner) == True]
#print(max(sent.index))
@@ -75,11 +82,8 @@ for i, r in data.iterrows():
    persons = tagged[tagged.n.str.endswith('PER')]
    names = persons[persons.w.apply(crude_ner) == True].copy()
    #names.w = names.w.str.strip('—')
    names.w = names.w.str.replace('', '')
    names.w = names.w.str.replace('\ufeff', '')
    names.w = names.w.str.replace('‘s', '')
    #print('names', names)

    names = hardcoded_clean(names)
    verbs = names.set_index(['w']).iloc[:,1:]
    if not verbs.empty:
        verbs = verbs.copy().stack().reset_index(level=1, name='arg').rename(columns={'level_1':'verb'})