Loading process_srl.py +3 −2 Original line number Diff line number Diff line Loading @@ -56,10 +56,12 @@ parser.add_argument("-c", "--combine", action="store_true", default=False, help= parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-l", "--lemmatize", action="store_true", default=False, help="whether to (re)do lemmatization") args = parser.parse_args() MIN_FREQ = args.min_freq N = args.n ARG = args.arg LEMMATIZE = args.lemmatize # load files sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) Loading Loading @@ -92,8 +94,7 @@ pd.set_option('display.width', 1000) def lemma(x): return nlp(x)[0].lemma_ lemmatize = False if lemmatize: if LEMMATIZE: nlp = spacy.load('en') df.verb = df.verb.apply(lemma) #['lemma'] df.to_csv('frequent_verbs/lemmatized.csv') Loading Loading
process_srl.py +3 −2 Original line number Diff line number Diff line Loading @@ -56,10 +56,12 @@ parser.add_argument("-c", "--combine", action="store_true", default=False, help= parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-l", "--lemmatize", action="store_true", default=False, help="whether to (re)do lemmatization") args = parser.parse_args() MIN_FREQ = args.min_freq N = args.n ARG = args.arg LEMMATIZE = args.lemmatize # load files sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) Loading Loading @@ -92,8 +94,7 @@ pd.set_option('display.width', 1000) def lemma(x): return nlp(x)[0].lemma_ lemmatize = False if lemmatize: if LEMMATIZE: nlp = spacy.load('en') df.verb = df.verb.apply(lemma) #['lemma'] df.to_csv('frequent_verbs/lemmatized.csv') Loading