Loading allzweckmesser/scanner.py +8 −30 Original line number Diff line number Diff line Loading @@ -9,13 +9,6 @@ from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon from .wordlist import WordList verses = [ 'nunc dum tibi lubet licetque pota perde rem', 'antehac est habitus parcus nec magis continens', "clamavit moriens lingua: 'Corinna, vale!'", 'an, quod ubique, tuum est? tua sunt Heliconia Tempe?', ] CLITICS = ['que', 'qve', 'ue', 've', 'ne'] Loading Loading @@ -177,9 +170,9 @@ def get_syllables_for_token(token: Token): syllables = [] if token.accented: regex = ( r'((?:ua|ue|ae|oe|au|eu|yi|[aeiouy])[_^]?)' r'((?:ua|ui|ue|ae|oe|au|eu|yi|[aeiouy])[_^]?)' if token.text[0].isupper() else r'((?:ua|ue|ae|oe|au|[aeiouy])[_^]?)' else r'((?:ua|ui|ue|ae|oe|au|[aeiouy])[_^]?)' ) accented = (token.accented + token.clitic if token.clitic Loading Loading @@ -228,15 +221,13 @@ def get_syllables_for_token(token: Token): syllable_length=syll_vowel_length) syllables.append(syll) else: syllables = None syllables = [] return syllables def get_syllables(reading): for token in reading.tokens: token.syllables = get_syllables_for_token(token) # TODO: Add positional_lengthening phenomena and adjust syllable # lengths accordingly. return reading Loading Loading @@ -275,7 +266,6 @@ def parse_verse(verse): """Annotates syllable lengths based on positional_lengthening and muta cum liquida """ positional_lengthening(verse) muta_cum_liquida(verse) Loading Loading @@ -328,23 +318,8 @@ def parse_verse(verse): else: new_readings.append(copy.deepcopy(reading)) return new_readings #for reading in verse.readings: #for token in reading.tokens: #for syllable in token.syllables: #if "muta cum liquida" in syllable.phenomena: #old_reading = copy.deepcopy(reading) #all_readings_parsed = False #if syllable_length == 1: #elif syllable_length == 2: verse.readings = new_readings return verse class Scanner: Loading @@ -353,6 +328,8 @@ class Scanner: self.word_list = WordList() def scan_verses(self, plain_verses: List[str]): import pdb pdb.set_trace() base_readings = [Reading(tokens=tokenize(v)) for v in plain_verses] verses = [ Verse(verse=v, readings=lemmatize(self.word_list, br)) Loading @@ -361,4 +338,5 @@ class Scanner: for verse in verses: for reading in verse.readings: get_syllables(reading) parse_verse(verse) return verses Loading
allzweckmesser/scanner.py +8 −30 Original line number Diff line number Diff line Loading @@ -9,13 +9,6 @@ from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon from .wordlist import WordList verses = [ 'nunc dum tibi lubet licetque pota perde rem', 'antehac est habitus parcus nec magis continens', "clamavit moriens lingua: 'Corinna, vale!'", 'an, quod ubique, tuum est? tua sunt Heliconia Tempe?', ] CLITICS = ['que', 'qve', 'ue', 've', 'ne'] Loading Loading @@ -177,9 +170,9 @@ def get_syllables_for_token(token: Token): syllables = [] if token.accented: regex = ( r'((?:ua|ue|ae|oe|au|eu|yi|[aeiouy])[_^]?)' r'((?:ua|ui|ue|ae|oe|au|eu|yi|[aeiouy])[_^]?)' if token.text[0].isupper() else r'((?:ua|ue|ae|oe|au|[aeiouy])[_^]?)' else r'((?:ua|ui|ue|ae|oe|au|[aeiouy])[_^]?)' ) accented = (token.accented + token.clitic if token.clitic Loading Loading @@ -228,15 +221,13 @@ def get_syllables_for_token(token: Token): syllable_length=syll_vowel_length) syllables.append(syll) else: syllables = None syllables = [] return syllables def get_syllables(reading): for token in reading.tokens: token.syllables = get_syllables_for_token(token) # TODO: Add positional_lengthening phenomena and adjust syllable # lengths accordingly. return reading Loading Loading @@ -275,7 +266,6 @@ def parse_verse(verse): """Annotates syllable lengths based on positional_lengthening and muta cum liquida """ positional_lengthening(verse) muta_cum_liquida(verse) Loading Loading @@ -328,23 +318,8 @@ def parse_verse(verse): else: new_readings.append(copy.deepcopy(reading)) return new_readings #for reading in verse.readings: #for token in reading.tokens: #for syllable in token.syllables: #if "muta cum liquida" in syllable.phenomena: #old_reading = copy.deepcopy(reading) #all_readings_parsed = False #if syllable_length == 1: #elif syllable_length == 2: verse.readings = new_readings return verse class Scanner: Loading @@ -353,6 +328,8 @@ class Scanner: self.word_list = WordList() def scan_verses(self, plain_verses: List[str]): import pdb pdb.set_trace() base_readings = [Reading(tokens=tokenize(v)) for v in plain_verses] verses = [ Verse(verse=v, readings=lemmatize(self.word_list, br)) Loading @@ -361,4 +338,5 @@ class Scanner: for verse in verses: for reading in verse.readings: get_syllables(reading) parse_verse(verse) return verses