Module gender_render.parse_templates
Parser functions for gender*render templates.
Expand source code Browse git
"""
Parser functions for gender*render templates.
"""
import copy
from typing import Tuple, Callable, List, Dict, Union, FrozenSet
from . import errors
from . import handle_context_values
from . import gender_nouns
from . import warnings
from . import global_capitalization_system
# Some helpful type hints:
ParsedTemplate = List[Union[str, List[Tuple[str, List[str]]]]]
"""A type hint describing a parsed template as it is returned by most of the methods of GRParser.
Note that not any structure build according to this constructor is valid, since some aspects cannot be described
by Python type hints."""
ParsedTemplateRefined = List[Union[str, Dict[str, Union[str, List[str], gender_nouns.GenderedNoun]]]]
"""A type similar to GRParser.ParsedTemplate that makes the sections of tags easier accessible by making them
dicts instead of lists of tuples."""
# definitions of words and word groups accepted by the finite state machine:
class Chars:
"""Helper to categorize characters."""
escape_char = "\\"
whitespace_chars = "\t\n \u200B"
special_chars = ":*{}\\"
ws = "whitespace"
char = "non-special chars"
@staticmethod
def type(c: str) -> str:
"""Returns the type of character c, which determines how states in the finite state machine that describes
gender*render syntax transition to each other.
This is either Chars.ws (whitespace), a special character or Chars.char (anything else)."""
if c in Chars.special_chars:
return c
elif c in Chars.whitespace_chars:
return Chars.ws
else:
return Chars.char
@staticmethod
def escape_gr_string(s: str, strict: bool = True) -> str:
"""Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace,
with backslashs.
if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go
into gender*render templates, yet not into the inners of the tags themselves."""
i = len(s) - 1
while i > -1:
if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}):
s = s[:i] + "\\" + s[i:]
i -= 1
return s
# definitions of states of the finite state machine:
class States:
"""Combines values for all sections the finite state machine that describes the syntax can be in, as well as
methods to handle the special escaped/unescaped versions of all states."""
# Currently, the read character is...
not_within_tags = "...not part of any tag"
in_empty_section = "...in a yet empty section"
in_not_empty_section = "...in a not anymore empty section"
in_section_with_one_finished_word = "...in a section which already contains a finished word"
in_empty_value_section = "...in a yet empty value section"
in_not_empty_value_section = "...in a not empty value section"
escaped = "...and escaped"
@staticmethod
def escape(state: str) -> str:
"""Converts an unescaped state to its escaped equivalent."""
assert not States.is_escaped(state)
return state + States.escaped
@staticmethod
def unescape(state: str) -> str:
"""Convert an escaped state to its unescaped equivalent."""
assert States.is_escaped(state)
return state[:len(state)-len(States.escaped)]
@staticmethod
def is_escaped(state: str) -> bool:
"""Checks is the current char is an escaped char."""
return state.endswith(States.escaped)
@staticmethod
def switch_escapement(state: str) -> str:
"""Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or
not."""
return States.escape(state) if not States.is_escaped(state) else States.unescape(state)
# the finite state machine, but without the escaped versions of all states since these are handled separately:
class Transitions:
"""Functions that modify (partially finished) `ParsedTemplate` data structures based on an added character.
These functions are used by `StateTransitioner` for its finite state machine.
All of these functions take a (partially finished) `ParsedTemplate` and a character and return the resulting
(possibly just partially finished) modified `ParsedTemplate`.
They might possibly still perform in-place operations on the `ParsedTemplate` they receive, though."""
@staticmethod
def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Does nothing to the template."""
return r
# add character to the template:
@staticmethod
def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a character to the text segment the template ends with."""
r[-1] += c
return r
@staticmethod
def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a character to the section type of the last section of the tag the template ends with."""
last_section = r[-1].pop()
r[-1].append((last_section[0] + c, last_section[1]))
return r
@staticmethod
def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a character to the last value of the last section of the tag the template ends with."""
r[-1][-1][1][-1] += c
return r
# start something new in the template:
@staticmethod
def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a new empty tag to the end of the template."""
r.append([("", [])])
return r
@staticmethod
def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a new empty section to the end of the tag the template ends with."""
last_section = r[-1].pop()
r[-1].append(("", [last_section[0]]))
r[-1].append(("", []))
return r
@staticmethod
def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Starts a new section value with its first character `c` in a section with explicitely specified type."""
r[-1][-1][1].append(c)
return r
@staticmethod
def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was
thought to be the section type was actually the first value of an un-typed section."""
last_section = r[-1].pop()
r[-1].append(("", [last_section[0], c]))
return r
@staticmethod
def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at
its end yet."""
values_of_last_section: List[str] = r[-1][-1][1]
if values_of_last_section[-1] != "":
values_of_last_section.append("")
return r
@staticmethod
def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is
empty."""
values_of_last_section: List[str] = r[-1][-1][1]
if values_of_last_section[-1] == "":
del values_of_last_section[-1]
r[-1].append(("", []))
return r
# close things in the tag:
@staticmethod
def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type."""
last_section = r[-1].pop()
r[-1].append(("", [last_section[0]]))
r.append("")
return r
@staticmethod
def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
"""Ends a tag and deletes its last section's last value if it is empty."""
values_of_last_section: List[str] = r[-1][-1][1]
if values_of_last_section[-1] == "":
del values_of_last_section[-1]
r.append("")
return r
class StateTransitioner:
"""Translates between states using a finite state machine.
This does not take into account the ability to escape characters."""
state_transitions: Dict[str, Dict[str, Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]]] = {
States.not_within_tags: {
"{": (
States.in_empty_section,
Transitions.start_new_tag),
Chars.ws: (
States.not_within_tags,
Transitions.add_to_text),
Chars.char: (
States.not_within_tags,
Transitions.add_to_text),
":": (
States.not_within_tags,
Transitions.add_to_text),
"*": (
States.not_within_tags,
Transitions.add_to_text)
},
States.in_empty_section: {
Chars.ws: (
States.in_empty_section,
Transitions.do_nothing),
Chars.char: (
States.in_not_empty_section,
Transitions.add_to_section_type)
},
States.in_not_empty_section: {
":": (
States.in_empty_value_section,
Transitions.do_nothing),
"*": (
States.in_empty_section,
Transitions.start_new_section_and_convert_section_type_to_section_value),
"}": (
States.not_within_tags,
Transitions.end_tag_after_typeless_section),
Chars.ws: (
States.in_section_with_one_finished_word,
Transitions.do_nothing),
Chars.char: (
States.in_not_empty_section,
Transitions.add_to_section_type)
},
States.in_section_with_one_finished_word: {
":": (
States.in_empty_value_section,
Transitions.do_nothing),
"*": (
States.in_empty_section,
Transitions.start_new_section_and_convert_section_type_to_section_value),
"}": (
States.not_within_tags,
Transitions.end_tag_after_typeless_section),
Chars.ws: (
States.in_section_with_one_finished_word,
Transitions.do_nothing),
Chars.char: (
States.in_not_empty_value_section,
Transitions.start_second_section_value_in_typeless_section)
},
States.in_empty_value_section: {
Chars.ws: (
States.in_empty_value_section,
Transitions.do_nothing),
Chars.char: (
States.in_not_empty_value_section,
Transitions.start_first_section_value_in_typed_section)
},
States.in_not_empty_value_section: {
"*": (
States.in_empty_section,
Transitions.start_new_section_and_delete_last_sections_value_if_empty),
"}": (
States.not_within_tags,
Transitions.end_tag_and_delete_last_sections_value_if_empty),
Chars.ws: (
States.in_not_empty_value_section,
Transitions.start_new_section_value_and_delete_last_sections_value_if_empty),
Chars.char: (
States.in_not_empty_value_section,
Transitions.add_to_section_value)
}
}
"""A data structure describing the template syntax as a finite state machine in which, for every state s1 and every
character c, `state_transitions[s1][c]` contains a tuple `(s2, f)` with s2 as the following state and f as the
function that is applied to the parsed data to include c in it."""
@staticmethod
def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]:
"""For a given state s and a given character c, returns the next state s2 and a function that takes a
list representation of the already-parsed data and c and returns a modified, extended duplicate of the data
based on c."""
type_of_char = Chars.type(char)
if type_of_char in StateTransitioner.state_transitions[state]:
return StateTransitioner.state_transitions[state][type_of_char]
else:
raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])
# define different section types:
class SectionTypes:
"""Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections."""
section_types_w_priorities = [
("context", 1000., True),
("id", 950., False),
("capitalization", 900., False)
]
"""All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)"""
@staticmethod
def section_type_accepts_multiple_values(section_type: str) -> bool:
"""Checks whether a section type can have multiple whitespace-separated values."""
return SectionTypes.section_type_exists(section_type) and bool(
[i for i in range(len(SectionTypes.section_types_w_priorities))
if SectionTypes.section_types_w_priorities[i][0] == section_type
and SectionTypes.section_types_w_priorities[i][2] is True]
)
@staticmethod
def section_type_exists(section_type: str) -> bool:
"""Checks if a section type exists."""
return bool(
[i for i in range(len(SectionTypes.section_types_w_priorities))
if SectionTypes.section_types_w_priorities[i][0] == section_type]
)
@staticmethod
def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]:
"""Receives a list of section types in a tag (in chronological order) and assigns section types to those
section without a section type, in accordance with the priorities of section types and the specification.
Returns the typed section type list.
Raises errors if section matching can not be done, or if no context section could be found."""
result = list()
# get all explicitly specified section types into a set:
already_used = set()
if len(section_types) > len(SectionTypes.section_types_w_priorities):
raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.")
for section_type in filter(lambda x: x != "", section_types):
if section_type in already_used:
raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.")
elif not SectionTypes.section_type_exists(section_type):
raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.")
else:
already_used.add(section_type)
# create a section priority queue without these element:
available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used]
available_sections_types.sort(key=lambda s: s[1])
# iterate over all declared section types from the left to the right:
for section_type in reversed(section_types):
if section_type == "":
result.insert(0, available_sections_types.pop()[0])
else:
result.insert(0, section_type)
# raise an error if there is no context value:
if "context" not in result:
raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.")
return result
# translate the content of gender*render templates into basic parsed lists:
class GRParser:
"""Unites several static methods of a pipeline for parsing gender*render templates from strings into a list format
and refining this representation to the maximum extend possible without additionally seeing the corresponding
gender*render pronoun data.
These functions are written to be executed in the order they are called by `full_parsing_pipeline`, and may or may
not behave as expected when called on a value that didn't go through the other functions first."""
@staticmethod
def parse_gr_template_from_str(template: str) -> ParsedTemplate:
"""Takes a gender*render template as a string and returns it as an easily readable list representation.
This does only do syntactic parsing in accordance to the defining finite state machine;
further steps in the parsing pipeline are implemented by other methods of this parser.
The resulting output is of the following structure:
* value of a section: represented by lists of strings
* type of section: represented by a string
* section: tuple of type representation and value representation
* tag: list of section representation
* template: list, where every uneven element represents a tag and every even element is a string
Special characters are all unescaped in the parsed version of the template."""
result = [""]
s = States.not_within_tags
line_no = 1
char_no = 1
# iterate over all characters:
for i in range(len(template)):
c = template[i]
# increment char count for SyntaxError raising:
if c == "\n":
line_no += 1
char_no = 1
else:
char_no += 1
# log:
warnings.WarningManager.raise_warning(
"result: " + str(result) + "\n\n"
+ "c: \"" + c + "\"\n"
+ "s: " + s + "\n"
+ "char type: " + Chars.type(c),
warnings.GRSyntaxParsingLogging)
# do the work of the finite state machine:
type_of_char = Chars.type(c)
if States.is_escaped(s):
s = States.unescape(s)
s, processing_function = StateTransitioner.state_transitions[s][Chars.char]
result = processing_function(result, c)
else:
if type_of_char == Chars.escape_char:
s = States.escape(s)
else:
try:
s, processing_function = StateTransitioner.transition_state(s, c)
result = processing_function(result, c)
except errors.SyntaxError:
raise errors.SyntaxError(
"The given gender*render template has invalid syntax.",
("unknown file", line_no, char_no, template.split("\n")[line_no - 1])
)
# raise an error if the template ends unproperly:
if States.is_escaped(s):
raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.",
("unknown file", line_no, char_no, template.split("\n")[-1]))
elif s != States.not_within_tags:
raise errors.SyntaxError("A tag opens, but is not finished properly.",
("unknown file", line_no, char_no, template.split("\n")[-1]))
return result
@staticmethod
def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate:
"""Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined
type a section type."""
result = copy.deepcopy(parsed_template)
for i in range(1, len(result), 2):
old_section_types: List[str] = [section[0] for section in result[i]]
new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types)
result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))]
return result
@staticmethod
def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
"""Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of
tags, one for every context value of the tag.
This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may
lead to wrong results otherwise.
The context section is left the end of the tag by this procedure."""
result = copy.deepcopy(parsed_template)
for i in reversed(range(1, len(result), 2)):
tag_without_context_section = [section for section in result[i] if section[0] != "context"]
tag_but_only_context_section = [section for section in result[i] if section[0] == "context"]
# split tag into one tag for every context value:
context_values = tag_but_only_context_section.pop()[1]
sequence_of_tags = [
(copy.deepcopy(tag_without_context_section) + [("context", [context_value])])
for context_value in context_values
]
for j in reversed(range(1, len(sequence_of_tags))):
sequence_of_tags.insert(j, " ")
result[i:i+1] = sequence_of_tags
return result
@staticmethod
def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
"""Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that
does not allow multiple values has multiple values. This should always be used before calling
convert_tags_to_indxable_dicts.
Returns the given dict afterwards."""
for i in range(len(parsed_template)):
if i % 2: # is a tag
for section_type, section_values in parsed_template[i]:
if SectionTypes.section_type_accepts_multiple_values(section_type):
continue
elif len(section_values) > 1:
raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\""
+ ReGRParser.unparse_gr_tag(parsed_template[i])
+ "\") has multiple values in \""
+ section_type +
"\"-section even though this type of section does"
+ " not support this.")
return parsed_template
@staticmethod
def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined:
"""Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a
representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}".
This makes the value of specific types of sections easier to access by other methods.
Note that the result returned by this method is different in that it is not accepted by the other methods of
GRParser, and that this method should thus be the last method in this pipeline to be used.
Raises an error if a section has multiple values yet accepts only one."""
result = copy.deepcopy(parsed_template)
for i in range(len(result)):
if i % 2: # is a tag
new_tag = dict()
for section_type, section_values in result[i]:
if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context":
new_tag[section_type] = section_values[0]
else:
new_tag[section_type] = section_values
result[i] = new_tag
return result
@staticmethod
def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
"""Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every
tag has a capitalization value."""
# ToDo: Test this function!
result = copy.deepcopy(parsed_template)
for i in range(1, len(parsed_template), 2):
global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i])
return result
@staticmethod
def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
"""Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template
where every context value is canonical."""
result = copy.deepcopy(parsed_template)
for i in range(1, len(parsed_template), 2):
result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"])
return result
@staticmethod
def full_parsing_pipeline(template: str) -> ParsedTemplateRefined:
"""Walks template through the full parsing pipeline defined by `GRParser`, and returns the result."""
template = GRParser.parse_gr_template_from_str(template)
template = GRParser.assign_types_to_all_sections(template)
template = GRParser.split_tags_with_multiple_context_values(template)
template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template)
template = GRParser.convert_tags_to_indexable_dicts(template)
template = GRParser.set_capitalization_value_for_all_tags(template)
template = GRParser.convert_context_values_to_canonicals(template)
return template
@staticmethod
def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]:
"""Returns a frozen set of all id values explicitly specified by tags in the parsed template."""
return frozenset(
parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i]
)
@staticmethod
def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool:
"""Returns whether the parsed template contains tags with unspecified id value."""
return bool(list(
parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i]
))
# functions to reverse parsed templates for testing and simplification purposes:
class ReGRParser:
"""Bundles methods to get a valid gender*render template from ParsedTemplate."""
@staticmethod
def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str:
return "{" + "*".join([(
((Chars.escape_gr_string(section[0]) + ":") if section[0] else "")
+ " ".join([Chars.escape_gr_string(value) for value in section[1]])
) for section in tag_representation]) + "}"
@staticmethod
def unparse_gr_template(parsed_template: ParsedTemplate) -> str:
"""Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to
the given parsed template.
This may be used for testing purposes or to simplify gender*render templates."""
result = str()
for i in range(len(parsed_template)):
if i % 2: # is a tag
result += ReGRParser.unparse_gr_tag(parsed_template[i])
else: # is a string
result += Chars.escape_gr_string(parsed_template[i], strict=False)
return result
# ToDo: This set of methods is currently abandoned; if you want to implement some functions to also convert
# ParsedTemplateRefined to strings, feel free to make a pull request/ issue and maybe we can add an interface for
# it!
Classes
class ParsedTemplate (*args, **kwargs)
-
list() -> new empty list list(iterable) -> new list initialized from iterable's items
Expand source code
class List(list, MutableSequence[T], extra=list): __slots__ = () def __new__(cls, *args, **kwds): if cls._gorg is List: raise TypeError("Type List cannot be instantiated; " "use list() instead") return _generic_new(list, cls, *args, **kwds)
Ancestors
- typing.List
- builtins.list
- typing.MutableSequence
- collections.abc.MutableSequence
- typing.Sequence
- collections.abc.Sequence
- typing.Reversible
- collections.abc.Reversible
- typing.Collection
- collections.abc.Collection
- collections.abc.Sized
- typing.Iterable
- collections.abc.Iterable
- typing.Container
- collections.abc.Container
- typing.Generic
class ParsedTemplateRefined (*args, **kwargs)
-
list() -> new empty list list(iterable) -> new list initialized from iterable's items
Expand source code
class List(list, MutableSequence[T], extra=list): __slots__ = () def __new__(cls, *args, **kwds): if cls._gorg is List: raise TypeError("Type List cannot be instantiated; " "use list() instead") return _generic_new(list, cls, *args, **kwds)
Ancestors
- typing.List
- builtins.list
- typing.MutableSequence
- collections.abc.MutableSequence
- typing.Sequence
- collections.abc.Sequence
- typing.Reversible
- collections.abc.Reversible
- typing.Collection
- collections.abc.Collection
- collections.abc.Sized
- typing.Iterable
- collections.abc.Iterable
- typing.Container
- collections.abc.Container
- typing.Generic
class Chars
-
Helper to categorize characters.
Expand source code Browse git
class Chars: """Helper to categorize characters.""" escape_char = "\\" whitespace_chars = "\t\n \u200B" special_chars = ":*{}\\" ws = "whitespace" char = "non-special chars" @staticmethod def type(c: str) -> str: """Returns the type of character c, which determines how states in the finite state machine that describes gender*render syntax transition to each other. This is either Chars.ws (whitespace), a special character or Chars.char (anything else).""" if c in Chars.special_chars: return c elif c in Chars.whitespace_chars: return Chars.ws else: return Chars.char @staticmethod def escape_gr_string(s: str, strict: bool = True) -> str: """Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace, with backslashs. if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go into gender*render templates, yet not into the inners of the tags themselves.""" i = len(s) - 1 while i > -1: if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}): s = s[:i] + "\\" + s[i:] i -= 1 return s
Class variables
var escape_char
var whitespace_chars
var special_chars
var ws
var char
Static methods
def type(c: str) ‑> str
-
Returns the type of character c, which determines how states in the finite state machine that describes gender*render syntax transition to each other. This is either Chars.ws (whitespace), a special character or Chars.char (anything else).
Expand source code Browse git
@staticmethod def type(c: str) -> str: """Returns the type of character c, which determines how states in the finite state machine that describes gender*render syntax transition to each other. This is either Chars.ws (whitespace), a special character or Chars.char (anything else).""" if c in Chars.special_chars: return c elif c in Chars.whitespace_chars: return Chars.ws else: return Chars.char
def escape_gr_string(s: str, strict: bool = True) ‑> str
-
Escapes all special genderrender characters in a string, such as {, }, \, : and , as well as whitespace, with backslashs. if
strict
is set to False, only {, } and \ are escaped; this may be used for strings that are supposed to go into gender*render templates, yet not into the inners of the tags themselves.Expand source code Browse git
@staticmethod def escape_gr_string(s: str, strict: bool = True) -> str: """Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace, with backslashs. if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go into gender*render templates, yet not into the inners of the tags themselves.""" i = len(s) - 1 while i > -1: if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}): s = s[:i] + "\\" + s[i:] i -= 1 return s
class States
-
Combines values for all sections the finite state machine that describes the syntax can be in, as well as methods to handle the special escaped/unescaped versions of all states.
Expand source code Browse git
class States: """Combines values for all sections the finite state machine that describes the syntax can be in, as well as methods to handle the special escaped/unescaped versions of all states.""" # Currently, the read character is... not_within_tags = "...not part of any tag" in_empty_section = "...in a yet empty section" in_not_empty_section = "...in a not anymore empty section" in_section_with_one_finished_word = "...in a section which already contains a finished word" in_empty_value_section = "...in a yet empty value section" in_not_empty_value_section = "...in a not empty value section" escaped = "...and escaped" @staticmethod def escape(state: str) -> str: """Converts an unescaped state to its escaped equivalent.""" assert not States.is_escaped(state) return state + States.escaped @staticmethod def unescape(state: str) -> str: """Convert an escaped state to its unescaped equivalent.""" assert States.is_escaped(state) return state[:len(state)-len(States.escaped)] @staticmethod def is_escaped(state: str) -> bool: """Checks is the current char is an escaped char.""" return state.endswith(States.escaped) @staticmethod def switch_escapement(state: str) -> str: """Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or not.""" return States.escape(state) if not States.is_escaped(state) else States.unescape(state)
Class variables
var in_empty_section
var in_not_empty_section
var in_section_with_one_finished_word
var in_empty_value_section
var in_not_empty_value_section
var escaped
Static methods
def escape(state: str) ‑> str
-
Converts an unescaped state to its escaped equivalent.
Expand source code Browse git
@staticmethod def escape(state: str) -> str: """Converts an unescaped state to its escaped equivalent.""" assert not States.is_escaped(state) return state + States.escaped
def unescape(state: str) ‑> str
-
Convert an escaped state to its unescaped equivalent.
Expand source code Browse git
@staticmethod def unescape(state: str) -> str: """Convert an escaped state to its unescaped equivalent.""" assert States.is_escaped(state) return state[:len(state)-len(States.escaped)]
def is_escaped(state: str) ‑> bool
-
Checks is the current char is an escaped char.
Expand source code Browse git
@staticmethod def is_escaped(state: str) -> bool: """Checks is the current char is an escaped char.""" return state.endswith(States.escaped)
def switch_escapement(state: str) ‑> str
-
Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or not.
Expand source code Browse git
@staticmethod def switch_escapement(state: str) -> str: """Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or not.""" return States.escape(state) if not States.is_escaped(state) else States.unescape(state)
class Transitions
-
Functions that modify (partially finished)
List
data structures based on an added character. These functions are used byStateTransitioner
for its finite state machine.All of these functions take a (partially finished)
List
and a character and return the resulting (possibly just partially finished) modifiedList
. They might possibly still perform in-place operations on theList
they receive, though.Expand source code Browse git
class Transitions: """Functions that modify (partially finished) `ParsedTemplate` data structures based on an added character. These functions are used by `StateTransitioner` for its finite state machine. All of these functions take a (partially finished) `ParsedTemplate` and a character and return the resulting (possibly just partially finished) modified `ParsedTemplate`. They might possibly still perform in-place operations on the `ParsedTemplate` they receive, though.""" @staticmethod def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate: """Does nothing to the template.""" return r # add character to the template: @staticmethod def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the text segment the template ends with.""" r[-1] += c return r @staticmethod def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the section type of the last section of the tag the template ends with.""" last_section = r[-1].pop() r[-1].append((last_section[0] + c, last_section[1])) return r @staticmethod def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the last value of the last section of the tag the template ends with.""" r[-1][-1][1][-1] += c return r # start something new in the template: @staticmethod def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty tag to the end of the template.""" r.append([("", [])]) return r @staticmethod def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty section to the end of the tag the template ends with.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0]])) r[-1].append(("", [])) return r @staticmethod def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Starts a new section value with its first character `c` in a section with explicitely specified type.""" r[-1][-1][1].append(c) return r @staticmethod def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was thought to be the section type was actually the first value of an un-typed section.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0], c])) return r @staticmethod def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at its end yet.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] != "": values_of_last_section.append("") return r @staticmethod def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is empty.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] == "": del values_of_last_section[-1] r[-1].append(("", [])) return r # close things in the tag: @staticmethod def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0]])) r.append("") return r @staticmethod def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Ends a tag and deletes its last section's last value if it is empty.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] == "": del values_of_last_section[-1] r.append("") return r
Static methods
def do_nothing(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Does nothing to the template.
Expand source code Browse git
@staticmethod def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate: """Does nothing to the template.""" return r
def add_to_text(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a character to the text segment the template ends with.
Expand source code Browse git
@staticmethod def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the text segment the template ends with.""" r[-1] += c return r
def add_to_section_type(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a character to the section type of the last section of the tag the template ends with.
Expand source code Browse git
@staticmethod def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the section type of the last section of the tag the template ends with.""" last_section = r[-1].pop() r[-1].append((last_section[0] + c, last_section[1])) return r
def add_to_section_value(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a character to the last value of the last section of the tag the template ends with.
Expand source code Browse git
@staticmethod def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a character to the last value of the last section of the tag the template ends with.""" r[-1][-1][1][-1] += c return r
def start_new_tag(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a new empty tag to the end of the template.
Expand source code Browse git
@staticmethod def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty tag to the end of the template.""" r.append([("", [])]) return r
def start_new_section_and_convert_section_type_to_section_value(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a new empty section to the end of the tag the template ends with.
Expand source code Browse git
@staticmethod def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty section to the end of the tag the template ends with.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0]])) r[-1].append(("", [])) return r
def start_first_section_value_in_typed_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Starts a new section value with its first character
c
in a section with explicitely specified type.Expand source code Browse git
@staticmethod def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Starts a new section value with its first character `c` in a section with explicitely specified type.""" r[-1][-1][1].append(c) return r
def start_second_section_value_in_typeless_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a new section value
c
to the end of the tag the template ends with, after discovering that what was thought to be the section type was actually the first value of an un-typed section.Expand source code Browse git
@staticmethod def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was thought to be the section type was actually the first value of an un-typed section.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0], c])) return r
def start_new_section_value_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at its end yet.
Expand source code Browse git
@staticmethod def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at its end yet.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] != "": values_of_last_section.append("") return r
def start_new_section_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is empty.
Expand source code Browse git
@staticmethod def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is empty.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] == "": del values_of_last_section[-1] r[-1].append(("", [])) return r
def end_tag_after_typeless_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type.
Expand source code Browse git
@staticmethod def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate: """Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type.""" last_section = r[-1].pop() r[-1].append(("", [last_section[0]])) r.append("") return r
def end_tag_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Ends a tag and deletes its last section's last value if it is empty.
Expand source code Browse git
@staticmethod def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate: """Ends a tag and deletes its last section's last value if it is empty.""" values_of_last_section: List[str] = r[-1][-1][1] if values_of_last_section[-1] == "": del values_of_last_section[-1] r.append("") return r
class StateTransitioner
-
Translates between states using a finite state machine. This does not take into account the ability to escape characters.
Expand source code Browse git
class StateTransitioner: """Translates between states using a finite state machine. This does not take into account the ability to escape characters.""" state_transitions: Dict[str, Dict[str, Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]]] = { States.not_within_tags: { "{": ( States.in_empty_section, Transitions.start_new_tag), Chars.ws: ( States.not_within_tags, Transitions.add_to_text), Chars.char: ( States.not_within_tags, Transitions.add_to_text), ":": ( States.not_within_tags, Transitions.add_to_text), "*": ( States.not_within_tags, Transitions.add_to_text) }, States.in_empty_section: { Chars.ws: ( States.in_empty_section, Transitions.do_nothing), Chars.char: ( States.in_not_empty_section, Transitions.add_to_section_type) }, States.in_not_empty_section: { ":": ( States.in_empty_value_section, Transitions.do_nothing), "*": ( States.in_empty_section, Transitions.start_new_section_and_convert_section_type_to_section_value), "}": ( States.not_within_tags, Transitions.end_tag_after_typeless_section), Chars.ws: ( States.in_section_with_one_finished_word, Transitions.do_nothing), Chars.char: ( States.in_not_empty_section, Transitions.add_to_section_type) }, States.in_section_with_one_finished_word: { ":": ( States.in_empty_value_section, Transitions.do_nothing), "*": ( States.in_empty_section, Transitions.start_new_section_and_convert_section_type_to_section_value), "}": ( States.not_within_tags, Transitions.end_tag_after_typeless_section), Chars.ws: ( States.in_section_with_one_finished_word, Transitions.do_nothing), Chars.char: ( States.in_not_empty_value_section, Transitions.start_second_section_value_in_typeless_section) }, States.in_empty_value_section: { Chars.ws: ( States.in_empty_value_section, Transitions.do_nothing), Chars.char: ( States.in_not_empty_value_section, Transitions.start_first_section_value_in_typed_section) }, States.in_not_empty_value_section: { "*": ( States.in_empty_section, Transitions.start_new_section_and_delete_last_sections_value_if_empty), "}": ( States.not_within_tags, Transitions.end_tag_and_delete_last_sections_value_if_empty), Chars.ws: ( States.in_not_empty_value_section, Transitions.start_new_section_value_and_delete_last_sections_value_if_empty), Chars.char: ( States.in_not_empty_value_section, Transitions.add_to_section_value) } } """A data structure describing the template syntax as a finite state machine in which, for every state s1 and every character c, `state_transitions[s1][c]` contains a tuple `(s2, f)` with s2 as the following state and f as the function that is applied to the parsed data to include c in it.""" @staticmethod def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]: """For a given state s and a given character c, returns the next state s2 and a function that takes a list representation of the already-parsed data and c and returns a modified, extended duplicate of the data based on c.""" type_of_char = Chars.type(char) if type_of_char in StateTransitioner.state_transitions[state]: return StateTransitioner.state_transitions[state][type_of_char] else: raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])
Class variables
var state_transitions : Dict[str, Dict[str, Tuple[str, Callable[[List[Union[str, List[Tuple[str, List[str]]]]], str], List[Union[str, List[Tuple[str, List[str]]]]]]]]]
-
A data structure describing the template syntax as a finite state machine in which, for every state s1 and every character c,
state_transitions[s1][c]
contains a tuple(s2, f)
with s2 as the following state and f as the function that is applied to the parsed data to include c in it.
Static methods
def transition_state(state: str, char: str) ‑> Tuple[str, Callable[[List[Union[str, List[Tuple[str, List[str]]]]], str], List[Union[str, List[Tuple[str, List[str]]]]]]]
-
For a given state s and a given character c, returns the next state s2 and a function that takes a list representation of the already-parsed data and c and returns a modified, extended duplicate of the data based on c.
Expand source code Browse git
@staticmethod def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]: """For a given state s and a given character c, returns the next state s2 and a function that takes a list representation of the already-parsed data and c and returns a modified, extended duplicate of the data based on c.""" type_of_char = Chars.type(char) if type_of_char in StateTransitioner.state_transitions[state]: return StateTransitioner.state_transitions[state][type_of_char] else: raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])
class SectionTypes
-
Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections.
Expand source code Browse git
class SectionTypes: """Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections.""" section_types_w_priorities = [ ("context", 1000., True), ("id", 950., False), ("capitalization", 900., False) ] """All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)""" @staticmethod def section_type_accepts_multiple_values(section_type: str) -> bool: """Checks whether a section type can have multiple whitespace-separated values.""" return SectionTypes.section_type_exists(section_type) and bool( [i for i in range(len(SectionTypes.section_types_w_priorities)) if SectionTypes.section_types_w_priorities[i][0] == section_type and SectionTypes.section_types_w_priorities[i][2] is True] ) @staticmethod def section_type_exists(section_type: str) -> bool: """Checks if a section type exists.""" return bool( [i for i in range(len(SectionTypes.section_types_w_priorities)) if SectionTypes.section_types_w_priorities[i][0] == section_type] ) @staticmethod def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]: """Receives a list of section types in a tag (in chronological order) and assigns section types to those section without a section type, in accordance with the priorities of section types and the specification. Returns the typed section type list. Raises errors if section matching can not be done, or if no context section could be found.""" result = list() # get all explicitly specified section types into a set: already_used = set() if len(section_types) > len(SectionTypes.section_types_w_priorities): raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.") for section_type in filter(lambda x: x != "", section_types): if section_type in already_used: raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.") elif not SectionTypes.section_type_exists(section_type): raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.") else: already_used.add(section_type) # create a section priority queue without these element: available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used] available_sections_types.sort(key=lambda s: s[1]) # iterate over all declared section types from the left to the right: for section_type in reversed(section_types): if section_type == "": result.insert(0, available_sections_types.pop()[0]) else: result.insert(0, section_type) # raise an error if there is no context value: if "context" not in result: raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.") return result
Class variables
var section_types_w_priorities
-
All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)
Static methods
def section_type_accepts_multiple_values(section_type: str) ‑> bool
-
Checks whether a section type can have multiple whitespace-separated values.
Expand source code Browse git
@staticmethod def section_type_accepts_multiple_values(section_type: str) -> bool: """Checks whether a section type can have multiple whitespace-separated values.""" return SectionTypes.section_type_exists(section_type) and bool( [i for i in range(len(SectionTypes.section_types_w_priorities)) if SectionTypes.section_types_w_priorities[i][0] == section_type and SectionTypes.section_types_w_priorities[i][2] is True] )
def section_type_exists(section_type: str) ‑> bool
-
Checks if a section type exists.
Expand source code Browse git
@staticmethod def section_type_exists(section_type: str) -> bool: """Checks if a section type exists.""" return bool( [i for i in range(len(SectionTypes.section_types_w_priorities)) if SectionTypes.section_types_w_priorities[i][0] == section_type] )
def create_section_types_for_untyped_tag(section_types: List[str]) ‑> List[str]
-
Receives a list of section types in a tag (in chronological order) and assigns section types to those section without a section type, in accordance with the priorities of section types and the specification. Returns the typed section type list. Raises errors if section matching can not be done, or if no context section could be found.
Expand source code Browse git
@staticmethod def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]: """Receives a list of section types in a tag (in chronological order) and assigns section types to those section without a section type, in accordance with the priorities of section types and the specification. Returns the typed section type list. Raises errors if section matching can not be done, or if no context section could be found.""" result = list() # get all explicitly specified section types into a set: already_used = set() if len(section_types) > len(SectionTypes.section_types_w_priorities): raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.") for section_type in filter(lambda x: x != "", section_types): if section_type in already_used: raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.") elif not SectionTypes.section_type_exists(section_type): raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.") else: already_used.add(section_type) # create a section priority queue without these element: available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used] available_sections_types.sort(key=lambda s: s[1]) # iterate over all declared section types from the left to the right: for section_type in reversed(section_types): if section_type == "": result.insert(0, available_sections_types.pop()[0]) else: result.insert(0, section_type) # raise an error if there is no context value: if "context" not in result: raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.") return result
class GRParser
-
Unites several static methods of a pipeline for parsing genderrender templates from strings into a list format and refining this representation to the maximum extend possible without additionally seeing the corresponding genderrender pronoun data.
These functions are written to be executed in the order they are called by
full_parsing_pipeline
, and may or may not behave as expected when called on a value that didn't go through the other functions first.Expand source code Browse git
class GRParser: """Unites several static methods of a pipeline for parsing gender*render templates from strings into a list format and refining this representation to the maximum extend possible without additionally seeing the corresponding gender*render pronoun data. These functions are written to be executed in the order they are called by `full_parsing_pipeline`, and may or may not behave as expected when called on a value that didn't go through the other functions first.""" @staticmethod def parse_gr_template_from_str(template: str) -> ParsedTemplate: """Takes a gender*render template as a string and returns it as an easily readable list representation. This does only do syntactic parsing in accordance to the defining finite state machine; further steps in the parsing pipeline are implemented by other methods of this parser. The resulting output is of the following structure: * value of a section: represented by lists of strings * type of section: represented by a string * section: tuple of type representation and value representation * tag: list of section representation * template: list, where every uneven element represents a tag and every even element is a string Special characters are all unescaped in the parsed version of the template.""" result = [""] s = States.not_within_tags line_no = 1 char_no = 1 # iterate over all characters: for i in range(len(template)): c = template[i] # increment char count for SyntaxError raising: if c == "\n": line_no += 1 char_no = 1 else: char_no += 1 # log: warnings.WarningManager.raise_warning( "result: " + str(result) + "\n\n" + "c: \"" + c + "\"\n" + "s: " + s + "\n" + "char type: " + Chars.type(c), warnings.GRSyntaxParsingLogging) # do the work of the finite state machine: type_of_char = Chars.type(c) if States.is_escaped(s): s = States.unescape(s) s, processing_function = StateTransitioner.state_transitions[s][Chars.char] result = processing_function(result, c) else: if type_of_char == Chars.escape_char: s = States.escape(s) else: try: s, processing_function = StateTransitioner.transition_state(s, c) result = processing_function(result, c) except errors.SyntaxError: raise errors.SyntaxError( "The given gender*render template has invalid syntax.", ("unknown file", line_no, char_no, template.split("\n")[line_no - 1]) ) # raise an error if the template ends unproperly: if States.is_escaped(s): raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.", ("unknown file", line_no, char_no, template.split("\n")[-1])) elif s != States.not_within_tags: raise errors.SyntaxError("A tag opens, but is not finished properly.", ("unknown file", line_no, char_no, template.split("\n")[-1])) return result @staticmethod def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined type a section type.""" result = copy.deepcopy(parsed_template) for i in range(1, len(result), 2): old_section_types: List[str] = [section[0] for section in result[i]] new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types) result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))] return result @staticmethod def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of tags, one for every context value of the tag. This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may lead to wrong results otherwise. The context section is left the end of the tag by this procedure.""" result = copy.deepcopy(parsed_template) for i in reversed(range(1, len(result), 2)): tag_without_context_section = [section for section in result[i] if section[0] != "context"] tag_but_only_context_section = [section for section in result[i] if section[0] == "context"] # split tag into one tag for every context value: context_values = tag_but_only_context_section.pop()[1] sequence_of_tags = [ (copy.deepcopy(tag_without_context_section) + [("context", [context_value])]) for context_value in context_values ] for j in reversed(range(1, len(sequence_of_tags))): sequence_of_tags.insert(j, " ") result[i:i+1] = sequence_of_tags return result @staticmethod def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that does not allow multiple values has multiple values. This should always be used before calling convert_tags_to_indxable_dicts. Returns the given dict afterwards.""" for i in range(len(parsed_template)): if i % 2: # is a tag for section_type, section_values in parsed_template[i]: if SectionTypes.section_type_accepts_multiple_values(section_type): continue elif len(section_values) > 1: raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\"" + ReGRParser.unparse_gr_tag(parsed_template[i]) + "\") has multiple values in \"" + section_type + "\"-section even though this type of section does" + " not support this.") return parsed_template @staticmethod def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined: """Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}". This makes the value of specific types of sections easier to access by other methods. Note that the result returned by this method is different in that it is not accepted by the other methods of GRParser, and that this method should thus be the last method in this pipeline to be used. Raises an error if a section has multiple values yet accepts only one.""" result = copy.deepcopy(parsed_template) for i in range(len(result)): if i % 2: # is a tag new_tag = dict() for section_type, section_values in result[i]: if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context": new_tag[section_type] = section_values[0] else: new_tag[section_type] = section_values result[i] = new_tag return result @staticmethod def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined: """Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every tag has a capitalization value.""" # ToDo: Test this function! result = copy.deepcopy(parsed_template) for i in range(1, len(parsed_template), 2): global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i]) return result @staticmethod def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined: """Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template where every context value is canonical.""" result = copy.deepcopy(parsed_template) for i in range(1, len(parsed_template), 2): result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"]) return result @staticmethod def full_parsing_pipeline(template: str) -> ParsedTemplateRefined: """Walks template through the full parsing pipeline defined by `GRParser`, and returns the result.""" template = GRParser.parse_gr_template_from_str(template) template = GRParser.assign_types_to_all_sections(template) template = GRParser.split_tags_with_multiple_context_values(template) template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template) template = GRParser.convert_tags_to_indexable_dicts(template) template = GRParser.set_capitalization_value_for_all_tags(template) template = GRParser.convert_context_values_to_canonicals(template) return template @staticmethod def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]: """Returns a frozen set of all id values explicitly specified by tags in the parsed template.""" return frozenset( parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i] ) @staticmethod def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool: """Returns whether the parsed template contains tags with unspecified id value.""" return bool(list( parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i] ))
Static methods
def parse_gr_template_from_str(template: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Takes a gender*render template as a string and returns it as an easily readable list representation. This does only do syntactic parsing in accordance to the defining finite state machine; further steps in the parsing pipeline are implemented by other methods of this parser.
The resulting output is of the following structure: * value of a section: represented by lists of strings * type of section: represented by a string * section: tuple of type representation and value representation * tag: list of section representation * template: list, where every uneven element represents a tag and every even element is a string
Special characters are all unescaped in the parsed version of the template.
Expand source code Browse git
@staticmethod def parse_gr_template_from_str(template: str) -> ParsedTemplate: """Takes a gender*render template as a string and returns it as an easily readable list representation. This does only do syntactic parsing in accordance to the defining finite state machine; further steps in the parsing pipeline are implemented by other methods of this parser. The resulting output is of the following structure: * value of a section: represented by lists of strings * type of section: represented by a string * section: tuple of type representation and value representation * tag: list of section representation * template: list, where every uneven element represents a tag and every even element is a string Special characters are all unescaped in the parsed version of the template.""" result = [""] s = States.not_within_tags line_no = 1 char_no = 1 # iterate over all characters: for i in range(len(template)): c = template[i] # increment char count for SyntaxError raising: if c == "\n": line_no += 1 char_no = 1 else: char_no += 1 # log: warnings.WarningManager.raise_warning( "result: " + str(result) + "\n\n" + "c: \"" + c + "\"\n" + "s: " + s + "\n" + "char type: " + Chars.type(c), warnings.GRSyntaxParsingLogging) # do the work of the finite state machine: type_of_char = Chars.type(c) if States.is_escaped(s): s = States.unescape(s) s, processing_function = StateTransitioner.state_transitions[s][Chars.char] result = processing_function(result, c) else: if type_of_char == Chars.escape_char: s = States.escape(s) else: try: s, processing_function = StateTransitioner.transition_state(s, c) result = processing_function(result, c) except errors.SyntaxError: raise errors.SyntaxError( "The given gender*render template has invalid syntax.", ("unknown file", line_no, char_no, template.split("\n")[line_no - 1]) ) # raise an error if the template ends unproperly: if States.is_escaped(s): raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.", ("unknown file", line_no, char_no, template.split("\n")[-1])) elif s != States.not_within_tags: raise errors.SyntaxError("A tag opens, but is not finished properly.", ("unknown file", line_no, char_no, template.split("\n")[-1])) return result
def assign_types_to_all_sections(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined type a section type.
Expand source code Browse git
@staticmethod def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined type a section type.""" result = copy.deepcopy(parsed_template) for i in range(1, len(result), 2): old_section_types: List[str] = [section[0] for section in result[i]] new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types) result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))] return result
-
Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of tags, one for every context value of the tag. This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may lead to wrong results otherwise. The context section is left the end of the tag by this procedure.
Expand source code Browse git
@staticmethod def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of tags, one for every context value of the tag. This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may lead to wrong results otherwise. The context section is left the end of the tag by this procedure.""" result = copy.deepcopy(parsed_template) for i in reversed(range(1, len(result), 2)): tag_without_context_section = [section for section in result[i] if section[0] != "context"] tag_but_only_context_section = [section for section in result[i] if section[0] == "context"] # split tag into one tag for every context value: context_values = tag_but_only_context_section.pop()[1] sequence_of_tags = [ (copy.deepcopy(tag_without_context_section) + [("context", [context_value])]) for context_value in context_values ] for j in reversed(range(1, len(sequence_of_tags))): sequence_of_tags.insert(j, " ") result[i:i+1] = sequence_of_tags return result
def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, List[Tuple[str, List[str]]]]]
-
Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that does not allow multiple values has multiple values. This should always be used before calling convert_tags_to_indxable_dicts. Returns the given dict afterwards.
Expand source code Browse git
@staticmethod def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate: """Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that does not allow multiple values has multiple values. This should always be used before calling convert_tags_to_indxable_dicts. Returns the given dict afterwards.""" for i in range(len(parsed_template)): if i % 2: # is a tag for section_type, section_values in parsed_template[i]: if SectionTypes.section_type_accepts_multiple_values(section_type): continue elif len(section_values) > 1: raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\"" + ReGRParser.unparse_gr_tag(parsed_template[i]) + "\") has multiple values in \"" + section_type + "\"-section even though this type of section does" + " not support this.") return parsed_template
-
Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}". This makes the value of specific types of sections easier to access by other methods. Note that the result returned by this method is different in that it is not accepted by the other methods of GRParser, and that this method should thus be the last method in this pipeline to be used. Raises an error if a section has multiple values yet accepts only one.
Expand source code Browse git
@staticmethod def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined: """Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}". This makes the value of specific types of sections easier to access by other methods. Note that the result returned by this method is different in that it is not accepted by the other methods of GRParser, and that this method should thus be the last method in this pipeline to be used. Raises an error if a section has multiple values yet accepts only one.""" result = copy.deepcopy(parsed_template) for i in range(len(result)): if i % 2: # is a tag new_tag = dict() for section_type, section_values in result[i]: if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context": new_tag[section_type] = section_values[0] else: new_tag[section_type] = section_values result[i] = new_tag return result
-
Takes a parsed template as returned by
GRParser.convert_tags_to_indexable_dicts()
and makes sure every tag has a capitalization value.Expand source code Browse git
@staticmethod def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined: """Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every tag has a capitalization value.""" # ToDo: Test this function! result = copy.deepcopy(parsed_template) for i in range(1, len(parsed_template), 2): global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i]) return result
def convert_context_values_to_canonicals(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]
-
Converts a parsed template as returned by
GRParser.convert_tags_to_indexable_dicts()
to a parsed template where every context value is canonical.Expand source code Browse git
@staticmethod def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined: """Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template where every context value is canonical.""" result = copy.deepcopy(parsed_template) for i in range(1, len(parsed_template), 2): result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"]) return result
def full_parsing_pipeline(template: str) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]
-
Walks template through the full parsing pipeline defined by
GRParser
, and returns the result.Expand source code Browse git
@staticmethod def full_parsing_pipeline(template: str) -> ParsedTemplateRefined: """Walks template through the full parsing pipeline defined by `GRParser`, and returns the result.""" template = GRParser.parse_gr_template_from_str(template) template = GRParser.assign_types_to_all_sections(template) template = GRParser.split_tags_with_multiple_context_values(template) template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template) template = GRParser.convert_tags_to_indexable_dicts(template) template = GRParser.set_capitalization_value_for_all_tags(template) template = GRParser.convert_context_values_to_canonicals(template) return template
def get_all_specified_id_values(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> FrozenSet[str]
-
Returns a frozen set of all id values explicitly specified by tags in the parsed template.
Expand source code Browse git
@staticmethod def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]: """Returns a frozen set of all id values explicitly specified by tags in the parsed template.""" return frozenset( parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i] )
def template_contains_unspecified_ids(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> bool
-
Returns whether the parsed template contains tags with unspecified id value.
Expand source code Browse git
@staticmethod def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool: """Returns whether the parsed template contains tags with unspecified id value.""" return bool(list( parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i] ))
class ReGRParser
-
Bundles methods to get a valid gender*render template from ParsedTemplate.
Expand source code Browse git
class ReGRParser: """Bundles methods to get a valid gender*render template from ParsedTemplate.""" @staticmethod def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str: return "{" + "*".join([( ((Chars.escape_gr_string(section[0]) + ":") if section[0] else "") + " ".join([Chars.escape_gr_string(value) for value in section[1]]) ) for section in tag_representation]) + "}" @staticmethod def unparse_gr_template(parsed_template: ParsedTemplate) -> str: """Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to the given parsed template. This may be used for testing purposes or to simplify gender*render templates.""" result = str() for i in range(len(parsed_template)): if i % 2: # is a tag result += ReGRParser.unparse_gr_tag(parsed_template[i]) else: # is a string result += Chars.escape_gr_string(parsed_template[i], strict=False) return result
Static methods
def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) ‑> str
-
Expand source code Browse git
@staticmethod def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str: return "{" + "*".join([( ((Chars.escape_gr_string(section[0]) + ":") if section[0] else "") + " ".join([Chars.escape_gr_string(value) for value in section[1]]) ) for section in tag_representation]) + "}"
def unparse_gr_template(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> str
-
Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to the given parsed template. This may be used for testing purposes or to simplify gender*render templates.
Expand source code Browse git
@staticmethod def unparse_gr_template(parsed_template: ParsedTemplate) -> str: """Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to the given parsed template. This may be used for testing purposes or to simplify gender*render templates.""" result = str() for i in range(len(parsed_template)): if i % 2: # is a tag result += ReGRParser.unparse_gr_tag(parsed_template[i]) else: # is a string result += Chars.escape_gr_string(parsed_template[i], strict=False) return result