Delete spacy_pipeline.py which was used as a demonstration (515b0cf5) · Commits · Aileen Reichelt / Semantic Textual Similarity

spacy_pipeline.py

deleted100644 → 0

+0 −53

Original line number	Diff line number	Diff line
		import spacy
		from STSB_Dataset import STSB_Dataset

		"""
		Read more here:
		https://spacy.io/usage/linguistic-features
		"""

		def print_nlp(text, mode = "pos"):
		if mode == "pos":
		nlp_list = [token.pos_ for token in text]
		if mode == "lemma":
		nlp_list = [token.lemma_ for token in text]
		if mode == "tag":
		nlp_list = [token.tag_ for token in text]
		if mode == "dep":
		nlp_list = [token.dep_ for token in text]

		print(" ".join(nlp_list))


		nlp = spacy.load("en_core_web_sm")

		dataset = STSB_Dataset('data/stsbenchmark/sts-test.csv')

		# spacy representation
		sentences1 = []
		sentences2 = []

		for sentence in [s['sentence1'] for s in dataset._examples]:
		sentences1.append(nlp(sentence))
		for sentence in [s['sentence2'] for s in dataset._examples]:
		sentences2.append(nlp(sentence))

		doc = sentences1[1]

		print("Part of speech:")
		print_nlp(doc)

		print("\nLemmas:")
		print_nlp(doc, "lemma")

		print("\nTags (related to part of speech):")
		print_nlp(doc, "tag")

		print("\nDependency role:")
		print_nlp(doc, "dep")


		print("\nDependency head/children:")
		for token in doc:
		print(f"Token: {token.text}, Head: {token.head.text}, Children: {[child for child in token.children]}")