Commit be509f11 authored by EstherMaria's avatar EstherMaria
Browse files
parents 12a0179b 050a529b
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -74,9 +74,10 @@ for i, r in data.iterrows():
    tagged = pd.DataFrame(zip(*tagged), columns=['w', 'n'] + verbs)
    persons = tagged[tagged.n.str.endswith('PER')]
    names = persons[persons.w.apply(crude_ner) == True].copy()
    names.w = names.w.str.strip('')
    names.w = names.w.str.strip('\ufeff')
    names.w = names.w.str.strip('‘s')
    #names.w = names.w.str.strip('—')
    names.w = names.w.str.replace('', '')
    names.w = names.w.str.replace('\ufeff', '')
    names.w = names.w.str.replace('‘s', '')
    #print('names', names)

    verbs = names.set_index(['w']).iloc[:,1:]