Loading allzweckmesser/model.py +4 −3 Original line number Diff line number Diff line Loading @@ -62,7 +62,7 @@ class Syllable: def __init__(self, syllable: str, span: List[int], idx: int, syllable_length: int, vowel_length: int, phenomena: dict = dict()): phenomena: dict = None): if len(syllable) != span[1] - span[0]: raise ValueError('Syllable length does not match syllable span.') else: Loading @@ -71,7 +71,7 @@ class Syllable: self.id = idx self.syllable_length = syllable_length self.vowel_length = vowel_length self.phenomena = phenomena self.phenomena = phenomena or dict() @classmethod def from_json(cls, json_file): Loading Loading @@ -437,6 +437,7 @@ class Verse: return json.dumps(self.to_dict()) def __str__(self): s = 'Verse: {verse}\n{reading_num} Readings:\n{readings}' readings_str = '\n'.join(str(r) for r in self.readings) Loading allzweckmesser/scanner.py +65 −14 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import copy import re from typing import Dict, List, Set, Tuple from itertools import product from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon Loading Loading @@ -257,27 +258,77 @@ def muta_cum_liquida(verse): def positional_lengthening(verse): pl_regex = re.compile(r'[aeiouv](((([bcdfgjklmnprstvwxz]|(qu)),?\s?){2,})|[xz])') if re.search(pl_regex, verse.text): matches = re.finditer(pl_regex, verse.text) for match in matches: for reading in verse.readings: for token in reading.tokens: for syllable in token.syllables: if syllable.span[0]<= match.start() < syllable.span[1]: syllable.phenomena['positional lengthening'] = Phenomenon(chars=match.group(1)) def parse_verse(verse): """Annotates syllable lengths based on positional_lengthening and muta cum liquida """ positional_lengthening(verse) muta_cum_liquida(verse) pl_regex = re.compile(r'[aeiouv]((([bcdfgjklmnprstvwxz]|(qu),?\s?){2,})|[xz])') match_starts = [match.start() for match in re.finditer(pl_regex, verse.text)] new_readings = list() for reading in verse.readings: syllables = [syllable for token in reading.tokens for syllable in token.syllables] lengths = str() abstract = str() mcl_count = 0 for syllable in syllables: if 'muta cum liquida' in syllable.phenomena: lengths += 'M' elif syllable.syllable_length == 2: lengths += 'L' elif any([syllable.span[0]<= start < syllable.span[1] for start in match_starts]): lengths += 'L' abstract += '{}' mcl_count += 1 elif 'positional lengthening' in syllable.phenomena: abstract += '2' elif syllable.syllable_length == 0: abstract += '0' elif syllable.syllable_length == 1: lengths += 'S' abstract += '1' elif syllable.syllable_length == 2: abstract += '2' if mcl_count > 0: new_abstracts = list() combinations = list(product(['1','2'],repeat=mcl_count)) for combi in combinations: new_abstracts.append(abstract.format(*combi)) reading_copies = multiply_readings([reading], (mcl_count)*2) for i in range(len(new_abstracts)): blueprint = new_abstracts[i] new_reading = reading_copies[i] syll_id = 0 for token in new_reading.tokens: for s in token.syllables: if blueprint[syll_id] == "1": s.syllable_length = 1 if 'positional lengthening' in s.phenomena and 'muta cum liquida' in s.phenomena: s.phenomena['positional lengthening'].overruled_by = 'muta cum liquida' elif blueprint[syll_id] == "2": s.syllable_length = 2 syll_id += 1 new_readings.append(copy.deepcopy(new_reading)) else: new_readings.append(copy.deepcopy(reading)) print(lengths) return new_readings #for reading in verse.readings: Loading tests/test_model.py +5 −3 Original line number Diff line number Diff line Loading @@ -21,10 +21,12 @@ def test_import_json(): assert verse_list_from_model == verse_list_from_json azm.scanner.muta_cum_liquida(verse_models[0]) for verse in verse_models: parsed_verses = azm.scanner.parse_verse(verse) for verse_model in verse_models[:1]: print(json.dumps(json.loads(verse_model.to_json()), indent=4, sort_keys=True)) for verse_model in parsed_verses: #print(json.dumps(json.loads(verse_model.to_json()), indent=4, sort_keys=True)) pass if __name__ == "__main__": test_import_json() Loading
allzweckmesser/model.py +4 −3 Original line number Diff line number Diff line Loading @@ -62,7 +62,7 @@ class Syllable: def __init__(self, syllable: str, span: List[int], idx: int, syllable_length: int, vowel_length: int, phenomena: dict = dict()): phenomena: dict = None): if len(syllable) != span[1] - span[0]: raise ValueError('Syllable length does not match syllable span.') else: Loading @@ -71,7 +71,7 @@ class Syllable: self.id = idx self.syllable_length = syllable_length self.vowel_length = vowel_length self.phenomena = phenomena self.phenomena = phenomena or dict() @classmethod def from_json(cls, json_file): Loading Loading @@ -437,6 +437,7 @@ class Verse: return json.dumps(self.to_dict()) def __str__(self): s = 'Verse: {verse}\n{reading_num} Readings:\n{readings}' readings_str = '\n'.join(str(r) for r in self.readings) Loading
allzweckmesser/scanner.py +65 −14 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import copy import re from typing import Dict, List, Set, Tuple from itertools import product from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon Loading Loading @@ -257,27 +258,77 @@ def muta_cum_liquida(verse): def positional_lengthening(verse): pl_regex = re.compile(r'[aeiouv](((([bcdfgjklmnprstvwxz]|(qu)),?\s?){2,})|[xz])') if re.search(pl_regex, verse.text): matches = re.finditer(pl_regex, verse.text) for match in matches: for reading in verse.readings: for token in reading.tokens: for syllable in token.syllables: if syllable.span[0]<= match.start() < syllable.span[1]: syllable.phenomena['positional lengthening'] = Phenomenon(chars=match.group(1)) def parse_verse(verse): """Annotates syllable lengths based on positional_lengthening and muta cum liquida """ positional_lengthening(verse) muta_cum_liquida(verse) pl_regex = re.compile(r'[aeiouv]((([bcdfgjklmnprstvwxz]|(qu),?\s?){2,})|[xz])') match_starts = [match.start() for match in re.finditer(pl_regex, verse.text)] new_readings = list() for reading in verse.readings: syllables = [syllable for token in reading.tokens for syllable in token.syllables] lengths = str() abstract = str() mcl_count = 0 for syllable in syllables: if 'muta cum liquida' in syllable.phenomena: lengths += 'M' elif syllable.syllable_length == 2: lengths += 'L' elif any([syllable.span[0]<= start < syllable.span[1] for start in match_starts]): lengths += 'L' abstract += '{}' mcl_count += 1 elif 'positional lengthening' in syllable.phenomena: abstract += '2' elif syllable.syllable_length == 0: abstract += '0' elif syllable.syllable_length == 1: lengths += 'S' abstract += '1' elif syllable.syllable_length == 2: abstract += '2' if mcl_count > 0: new_abstracts = list() combinations = list(product(['1','2'],repeat=mcl_count)) for combi in combinations: new_abstracts.append(abstract.format(*combi)) reading_copies = multiply_readings([reading], (mcl_count)*2) for i in range(len(new_abstracts)): blueprint = new_abstracts[i] new_reading = reading_copies[i] syll_id = 0 for token in new_reading.tokens: for s in token.syllables: if blueprint[syll_id] == "1": s.syllable_length = 1 if 'positional lengthening' in s.phenomena and 'muta cum liquida' in s.phenomena: s.phenomena['positional lengthening'].overruled_by = 'muta cum liquida' elif blueprint[syll_id] == "2": s.syllable_length = 2 syll_id += 1 new_readings.append(copy.deepcopy(new_reading)) else: new_readings.append(copy.deepcopy(reading)) print(lengths) return new_readings #for reading in verse.readings: Loading
tests/test_model.py +5 −3 Original line number Diff line number Diff line Loading @@ -21,10 +21,12 @@ def test_import_json(): assert verse_list_from_model == verse_list_from_json azm.scanner.muta_cum_liquida(verse_models[0]) for verse in verse_models: parsed_verses = azm.scanner.parse_verse(verse) for verse_model in verse_models[:1]: print(json.dumps(json.loads(verse_model.to_json()), indent=4, sort_keys=True)) for verse_model in parsed_verses: #print(json.dumps(json.loads(verse_model.to_json()), indent=4, sort_keys=True)) pass if __name__ == "__main__": test_import_json()