Module `gender_render.parse_templates`

Parser functions for gender*render templates.

Expand source code Browse git

"""
Parser functions for gender*render templates.
"""

import copy
from typing import Tuple, Callable, List, Dict, Union, FrozenSet

from . import errors
from . import handle_context_values
from . import gender_nouns
from . import warnings
from . import global_capitalization_system

# Some helpful type hints:


ParsedTemplate = List[Union[str, List[Tuple[str, List[str]]]]]
"""A type hint describing a parsed template as it is returned by most of the methods of GRParser.
Note that not any structure build according to this constructor is valid, since some aspects cannot be described
by Python type hints."""

ParsedTemplateRefined = List[Union[str, Dict[str, Union[str, List[str], gender_nouns.GenderedNoun]]]]
"""A type similar to GRParser.ParsedTemplate that makes the sections of tags easier accessible by making them
dicts instead of lists of tuples."""

# definitions of words and word groups accepted by the finite state machine:


class Chars:
    """Helper to categorize characters."""
    escape_char = "\\"
    whitespace_chars = "\t\n \u200B"
    special_chars = ":*{}\\"
    ws = "whitespace"
    char = "non-special chars"

    @staticmethod
    def type(c: str) -> str:
        """Returns the type of character c, which determines how states in the finite state machine that describes
        gender*render syntax transition to each other.
        This is either Chars.ws (whitespace), a special character or Chars.char (anything else)."""
        if c in Chars.special_chars:
            return c
        elif c in Chars.whitespace_chars:
            return Chars.ws
        else:
            return Chars.char

    @staticmethod
    def escape_gr_string(s: str, strict: bool = True) -> str:
        """Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace,
        with backslashs.
        if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go
        into gender*render templates, yet not into the inners of the tags themselves."""
        i = len(s) - 1
        while i > -1:
            if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}):
                s = s[:i] + "\\" + s[i:]
            i -= 1
        return s

# definitions of states of the finite state machine:


class States:
    """Combines values for all sections the finite state machine that describes the syntax can be in, as well as
    methods to handle the special escaped/unescaped versions of all states."""

    # Currently, the read character is...
    not_within_tags = "...not part of any tag"

    in_empty_section = "...in a yet empty section"
    in_not_empty_section = "...in a not anymore empty section"
    in_section_with_one_finished_word = "...in a section which already contains a finished word"

    in_empty_value_section = "...in a yet empty value section"
    in_not_empty_value_section = "...in a not empty value section"

    escaped = "...and escaped"

    @staticmethod
    def escape(state: str) -> str:
        """Converts an unescaped state to its escaped equivalent."""
        assert not States.is_escaped(state)
        return state + States.escaped

    @staticmethod
    def unescape(state: str) -> str:
        """Convert an escaped state to its unescaped equivalent."""
        assert States.is_escaped(state)
        return state[:len(state)-len(States.escaped)]

    @staticmethod
    def is_escaped(state: str) -> bool:
        """Checks is the current char is an escaped char."""
        return state.endswith(States.escaped)

    @staticmethod
    def switch_escapement(state: str) -> str:
        """Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or
        not."""
        return States.escape(state) if not States.is_escaped(state) else States.unescape(state)

# the finite state machine, but without the escaped versions of all states since these are handled separately:


class Transitions:
    """Functions that modify (partially finished) `ParsedTemplate` data structures based on an added character.
    These functions are used by `StateTransitioner` for its finite state machine.

    All of these functions take a (partially finished) `ParsedTemplate` and a character and return the resulting
    (possibly just partially finished) modified `ParsedTemplate`.
    They might possibly still perform in-place operations on the `ParsedTemplate` they receive, though."""

    @staticmethod
    def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Does nothing to the template."""
        return r

    # add character to the template:

    @staticmethod
    def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the text segment the template ends with."""
        r[-1] += c
        return r

    @staticmethod
    def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the section type of the last section of the tag the template ends with."""
        last_section = r[-1].pop()
        r[-1].append((last_section[0] + c, last_section[1]))
        return r

    @staticmethod
    def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the last value of the last section of the tag the template ends with."""
        r[-1][-1][1][-1] += c
        return r

    # start something new in the template:

    @staticmethod
    def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty tag to the end of the template."""
        r.append([("", [])])
        return r

    @staticmethod
    def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty section to the end of the tag the template ends with."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0]]))
        r[-1].append(("", []))
        return r

    @staticmethod
    def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Starts a new section value with its first character `c` in a section with explicitely specified type."""
        r[-1][-1][1].append(c)
        return r

    @staticmethod
    def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was
        thought to be the section type was actually the first value of an un-typed section."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0], c]))
        return r

    @staticmethod
    def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at
        its end yet."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] != "":
            values_of_last_section.append("")
        return r

    @staticmethod
    def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is
        empty."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] == "":
            del values_of_last_section[-1]
        r[-1].append(("", []))
        return r

    # close things in the tag:

    @staticmethod
    def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0]]))
        r.append("")
        return r

    @staticmethod
    def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Ends a tag and deletes its last section's last value if it is empty."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] == "":
            del values_of_last_section[-1]
        r.append("")
        return r


class StateTransitioner:
    """Translates between states using a finite state machine.
    This does not take into account the ability to escape characters."""

    state_transitions: Dict[str, Dict[str, Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]]] = {
        States.not_within_tags: {
            "{": (
                States.in_empty_section,
                Transitions.start_new_tag),
            Chars.ws: (
                States.not_within_tags,
                Transitions.add_to_text),
            Chars.char: (
                States.not_within_tags,
                Transitions.add_to_text),
            ":": (
                States.not_within_tags,
                Transitions.add_to_text),
            "*": (
                States.not_within_tags,
                Transitions.add_to_text)
        },
        States.in_empty_section: {
            Chars.ws: (
                States.in_empty_section,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_section,
                Transitions.add_to_section_type)
        },
        States.in_not_empty_section: {
            ":": (
                States.in_empty_value_section,
                Transitions.do_nothing),
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_convert_section_type_to_section_value),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_after_typeless_section),
            Chars.ws: (
                States.in_section_with_one_finished_word,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_section,
                Transitions.add_to_section_type)
        },
        States.in_section_with_one_finished_word: {
            ":": (
                States.in_empty_value_section,
                Transitions.do_nothing),
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_convert_section_type_to_section_value),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_after_typeless_section),
            Chars.ws: (
                States.in_section_with_one_finished_word,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.start_second_section_value_in_typeless_section)
        },
        States.in_empty_value_section: {
            Chars.ws: (
                States.in_empty_value_section,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.start_first_section_value_in_typed_section)
        },
        States.in_not_empty_value_section: {
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_delete_last_sections_value_if_empty),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_and_delete_last_sections_value_if_empty),
            Chars.ws: (
                States.in_not_empty_value_section,
                Transitions.start_new_section_value_and_delete_last_sections_value_if_empty),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.add_to_section_value)
        }
    }
    """A data structure describing the template syntax as a finite state machine in which, for every state s1 and every
    character c, `state_transitions[s1][c]` contains a tuple `(s2, f)` with s2 as the following state and f as the
    function that is applied to the parsed data to include c in it."""

    @staticmethod
    def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]:
        """For a given state s and a given character c, returns the next state s2 and a function that takes a
        list representation of the already-parsed data and c and returns a modified, extended duplicate of the data
        based on c."""
        type_of_char = Chars.type(char)
        if type_of_char in StateTransitioner.state_transitions[state]:
            return StateTransitioner.state_transitions[state][type_of_char]
        else:
            raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])

# define different section types:


class SectionTypes:
    """Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections."""

    section_types_w_priorities = [
        ("context", 1000., True),
        ("id", 950., False),
        ("capitalization", 900., False)
    ]
    """All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)"""

    @staticmethod
    def section_type_accepts_multiple_values(section_type: str) -> bool:
        """Checks whether a section type can have multiple whitespace-separated values."""
        return SectionTypes.section_type_exists(section_type) and bool(
            [i for i in range(len(SectionTypes.section_types_w_priorities))
             if SectionTypes.section_types_w_priorities[i][0] == section_type
             and SectionTypes.section_types_w_priorities[i][2] is True]
        )

    @staticmethod
    def section_type_exists(section_type: str) -> bool:
        """Checks if a section type exists."""
        return bool(
            [i for i in range(len(SectionTypes.section_types_w_priorities))
             if SectionTypes.section_types_w_priorities[i][0] == section_type]
        )

    @staticmethod
    def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]:
        """Receives a list of section types in a tag (in chronological order) and assigns section types to those
        section without a section type, in accordance with the priorities of section types and the specification.
        Returns the typed section type list.
        Raises errors if section matching can not be done, or if no context section could be found."""
        result = list()

        # get all explicitly specified section types into a set:
        already_used = set()
        if len(section_types) > len(SectionTypes.section_types_w_priorities):
            raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.")
        for section_type in filter(lambda x: x != "", section_types):
            if section_type in already_used:
                raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.")
            elif not SectionTypes.section_type_exists(section_type):
                raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.")
            else:
                already_used.add(section_type)

        # create a section priority queue without these element:
        available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used]
        available_sections_types.sort(key=lambda s: s[1])

        # iterate over all declared section types from the left to the right:
        for section_type in reversed(section_types):
            if section_type == "":
                result.insert(0, available_sections_types.pop()[0])
            else:
                result.insert(0, section_type)

        # raise an error if there is no context value:
        if "context" not in result:
            raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.")

        return result


# translate the content of gender*render templates into basic parsed lists:


class GRParser:
    """Unites several static methods of a pipeline for parsing gender*render templates from strings into a list format
    and refining this representation to the maximum extend possible without additionally seeing the corresponding
    gender*render pronoun data.

    These functions are written to be executed in the order they are called by `full_parsing_pipeline`, and may or may
    not behave as expected when called on a value that didn't go through the other functions first."""

    @staticmethod
    def parse_gr_template_from_str(template: str) -> ParsedTemplate:
        """Takes a gender*render template as a string and returns it as an easily readable list representation.
        This does only do syntactic parsing in accordance to the defining finite state machine;
        further steps in the parsing pipeline are implemented by other methods of this parser.

        The resulting output is of the following structure:
        * value of a section: represented by lists of strings
        * type of section: represented by a string
        * section: tuple of type representation and value representation
        * tag: list of section representation
        * template: list, where every uneven element represents a tag and every even element is a string

        Special characters are all unescaped in the parsed version of the template."""

        result = [""]
        s = States.not_within_tags
        line_no = 1
        char_no = 1
        # iterate over all characters:
        for i in range(len(template)):
            c = template[i]
            # increment char count for SyntaxError raising:
            if c == "\n":
                line_no += 1
                char_no = 1
            else:
                char_no += 1

            # log:
            warnings.WarningManager.raise_warning(
                "result: " + str(result) + "\n\n"
                + "c: \"" + c + "\"\n"
                + "s: " + s + "\n"
                + "char type: " + Chars.type(c),
                warnings.GRSyntaxParsingLogging)

            # do the work of the finite state machine:
            type_of_char = Chars.type(c)
            if States.is_escaped(s):
                s = States.unescape(s)
                s, processing_function = StateTransitioner.state_transitions[s][Chars.char]
                result = processing_function(result, c)
            else:
                if type_of_char == Chars.escape_char:
                    s = States.escape(s)
                else:
                    try:
                        s, processing_function = StateTransitioner.transition_state(s, c)
                        result = processing_function(result, c)
                    except errors.SyntaxError:
                        raise errors.SyntaxError(
                            "The given gender*render template has invalid syntax.",
                            ("unknown file", line_no, char_no, template.split("\n")[line_no - 1])
                        )

        # raise an error if the template ends unproperly:
        if States.is_escaped(s):
            raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.",
                                     ("unknown file", line_no, char_no, template.split("\n")[-1]))
        elif s != States.not_within_tags:
            raise errors.SyntaxError("A tag opens, but is not finished properly.",
                                     ("unknown file", line_no, char_no, template.split("\n")[-1]))

        return result

    @staticmethod
    def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined
        type a section type."""
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(result), 2):
            old_section_types: List[str] = [section[0] for section in result[i]]
            new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types)
            result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))]
        return result

    @staticmethod
    def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of
        tags, one for every context value of the tag.
        This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may
        lead to wrong results otherwise.
        The context section is left the end of the tag by this procedure."""
        result = copy.deepcopy(parsed_template)
        for i in reversed(range(1, len(result), 2)):
            tag_without_context_section = [section for section in result[i] if section[0] != "context"]
            tag_but_only_context_section = [section for section in result[i] if section[0] == "context"]

            # split tag into one tag for every context value:
            context_values = tag_but_only_context_section.pop()[1]
            sequence_of_tags = [
                (copy.deepcopy(tag_without_context_section) + [("context", [context_value])])
                for context_value in context_values
            ]
            for j in reversed(range(1, len(sequence_of_tags))):
                sequence_of_tags.insert(j, " ")
            result[i:i+1] = sequence_of_tags

        return result

    @staticmethod
    def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that
        does not allow multiple values has multiple values. This should always be used before calling
        convert_tags_to_indxable_dicts.
        Returns the given dict afterwards."""
        for i in range(len(parsed_template)):
            if i % 2:  # is a tag
                for section_type, section_values in parsed_template[i]:
                    if SectionTypes.section_type_accepts_multiple_values(section_type):
                        continue
                    elif len(section_values) > 1:
                        raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\""
                                                               + ReGRParser.unparse_gr_tag(parsed_template[i])
                                                               + "\") has multiple values in \""
                                                               + section_type +
                                                               "\"-section even though this type of section does"
                                                               + " not support this.")
        return parsed_template

    @staticmethod
    def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined:
        """Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a
        representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}".
        This makes the value of specific types of sections easier to access by other methods.
        Note that the result returned by this method is different in that it is not accepted by the other methods of
        GRParser, and that this method should thus be the last method in this pipeline to be used.
        Raises an error if a section has multiple values yet accepts only one."""
        result = copy.deepcopy(parsed_template)
        for i in range(len(result)):
            if i % 2:  # is a tag
                new_tag = dict()
                for section_type, section_values in result[i]:
                    if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context":
                        new_tag[section_type] = section_values[0]
                    else:
                        new_tag[section_type] = section_values
                result[i] = new_tag

        return result

    @staticmethod
    def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
        """Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every
        tag has a capitalization value."""
        # ToDo: Test this function!
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(parsed_template), 2):
            global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i])
        return result

    @staticmethod
    def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
        """Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template
        where every context value is canonical."""
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(parsed_template), 2):
            result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"])
        return result

    @staticmethod
    def full_parsing_pipeline(template: str) -> ParsedTemplateRefined:
        """Walks template through the full parsing pipeline defined by `GRParser`, and returns the result."""
        template = GRParser.parse_gr_template_from_str(template)
        template = GRParser.assign_types_to_all_sections(template)
        template = GRParser.split_tags_with_multiple_context_values(template)
        template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template)
        template = GRParser.convert_tags_to_indexable_dicts(template)
        template = GRParser.set_capitalization_value_for_all_tags(template)
        template = GRParser.convert_context_values_to_canonicals(template)
        return template

    @staticmethod
    def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]:
        """Returns a frozen set of all id values explicitly specified by tags in the parsed template."""
        return frozenset(
            parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i]
        )

    @staticmethod
    def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool:
        """Returns whether the parsed template contains tags with unspecified id value."""
        return bool(list(
            parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i]
        ))

# functions to reverse parsed templates for testing and simplification purposes:


class ReGRParser:
    """Bundles methods to get a valid gender*render template from ParsedTemplate."""

    @staticmethod
    def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str:
        return "{" + "*".join([(
                            ((Chars.escape_gr_string(section[0]) + ":") if section[0] else "")
                            + " ".join([Chars.escape_gr_string(value) for value in section[1]])
                        ) for section in tag_representation]) + "}"

    @staticmethod
    def unparse_gr_template(parsed_template: ParsedTemplate) -> str:
        """Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to
        the given parsed template.
        This may be used for testing purposes or to simplify gender*render templates."""
        result = str()
        for i in range(len(parsed_template)):
            if i % 2:  # is a tag
                result += ReGRParser.unparse_gr_tag(parsed_template[i])
            else:  # is a string
                result += Chars.escape_gr_string(parsed_template[i], strict=False)
        return result

    # ToDo: This set of methods is currently abandoned; if you want to implement some functions to also convert
    #  ParsedTemplateRefined to strings, feel free to make a pull request/ issue and maybe we can add an interface for
    #  it!

Classes

class ParsedTemplate (*args, **kwargs)

list() -> new empty list list(iterable) -> new list initialized from iterable's items

Expand source code

class List(list, MutableSequence[T], extra=list):

    __slots__ = ()

    def __new__(cls, *args, **kwds):
        if cls._gorg is List:
            raise TypeError("Type List cannot be instantiated; "
                            "use list() instead")
        return _generic_new(list, cls, *args, **kwds)

Ancestors

typing.List
builtins.list
typing.MutableSequence
collections.abc.MutableSequence
typing.Sequence
collections.abc.Sequence
typing.Reversible
collections.abc.Reversible
typing.Collection
collections.abc.Collection
collections.abc.Sized
typing.Iterable
collections.abc.Iterable
typing.Container
collections.abc.Container
typing.Generic

class ParsedTemplateRefined (*args, **kwargs)

list() -> new empty list list(iterable) -> new list initialized from iterable's items

Expand source code

class List(list, MutableSequence[T], extra=list):

    __slots__ = ()

    def __new__(cls, *args, **kwds):
        if cls._gorg is List:
            raise TypeError("Type List cannot be instantiated; "
                            "use list() instead")
        return _generic_new(list, cls, *args, **kwds)

Ancestors

typing.List
builtins.list
typing.MutableSequence
collections.abc.MutableSequence
typing.Sequence
collections.abc.Sequence
typing.Reversible
collections.abc.Reversible
typing.Collection
collections.abc.Collection
collections.abc.Sized
typing.Iterable
collections.abc.Iterable
typing.Container
collections.abc.Container
typing.Generic

class Chars

Helper to categorize characters.

Expand source code Browse git

class Chars:
    """Helper to categorize characters."""
    escape_char = "\\"
    whitespace_chars = "\t\n \u200B"
    special_chars = ":*{}\\"
    ws = "whitespace"
    char = "non-special chars"

    @staticmethod
    def type(c: str) -> str:
        """Returns the type of character c, which determines how states in the finite state machine that describes
        gender*render syntax transition to each other.
        This is either Chars.ws (whitespace), a special character or Chars.char (anything else)."""
        if c in Chars.special_chars:
            return c
        elif c in Chars.whitespace_chars:
            return Chars.ws
        else:
            return Chars.char

    @staticmethod
    def escape_gr_string(s: str, strict: bool = True) -> str:
        """Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace,
        with backslashs.
        if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go
        into gender*render templates, yet not into the inners of the tags themselves."""
        i = len(s) - 1
        while i > -1:
            if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}):
                s = s[:i] + "\\" + s[i:]
            i -= 1
        return s

Class variables

var escape_char
var whitespace_chars
var special_chars
var ws
var char

Static methods

def type(c: str) ‑> str

Returns the type of character c, which determines how states in the finite state machine that describes gender*render syntax transition to each other. This is either Chars.ws (whitespace), a special character or Chars.char (anything else).

Expand source code Browse git

@staticmethod
def type(c: str) -> str:
    """Returns the type of character c, which determines how states in the finite state machine that describes
    gender*render syntax transition to each other.
    This is either Chars.ws (whitespace), a special character or Chars.char (anything else)."""
    if c in Chars.special_chars:
        return c
    elif c in Chars.whitespace_chars:
        return Chars.ws
    else:
        return Chars.char

def escape_gr_string(s: str, strict: bool = True) ‑> str

Escapes all special genderrender characters in a string, such as {, }, \, : and , as well as whitespace, with backslashs. if strict is set to False, only {, } and \ are escaped; this may be used for strings that are supposed to go into gender*render templates, yet not into the inners of the tags themselves.

Expand source code Browse git

@staticmethod
def escape_gr_string(s: str, strict: bool = True) -> str:
    """Escapes all special gender*render characters in a string, such as {, }, \\, : and *, as well as whitespace,
    with backslashs.
    if `strict` is set to False, only {, } and \\ are escaped; this may be used for strings that are supposed to go
    into gender*render templates, yet not into the inners of the tags themselves."""
    i = len(s) - 1
    while i > -1:
        if s[i] in ((Chars.special_chars + Chars.whitespace_chars) if strict else {"\\", "{", "}"}):
            s = s[:i] + "\\" + s[i:]
        i -= 1
    return s

class States

Combines values for all sections the finite state machine that describes the syntax can be in, as well as methods to handle the special escaped/unescaped versions of all states.

Expand source code Browse git

class States:
    """Combines values for all sections the finite state machine that describes the syntax can be in, as well as
    methods to handle the special escaped/unescaped versions of all states."""

    # Currently, the read character is...
    not_within_tags = "...not part of any tag"

    in_empty_section = "...in a yet empty section"
    in_not_empty_section = "...in a not anymore empty section"
    in_section_with_one_finished_word = "...in a section which already contains a finished word"

    in_empty_value_section = "...in a yet empty value section"
    in_not_empty_value_section = "...in a not empty value section"

    escaped = "...and escaped"

    @staticmethod
    def escape(state: str) -> str:
        """Converts an unescaped state to its escaped equivalent."""
        assert not States.is_escaped(state)
        return state + States.escaped

    @staticmethod
    def unescape(state: str) -> str:
        """Convert an escaped state to its unescaped equivalent."""
        assert States.is_escaped(state)
        return state[:len(state)-len(States.escaped)]

    @staticmethod
    def is_escaped(state: str) -> bool:
        """Checks is the current char is an escaped char."""
        return state.endswith(States.escaped)

    @staticmethod
    def switch_escapement(state: str) -> str:
        """Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or
        not."""
        return States.escape(state) if not States.is_escaped(state) else States.unescape(state)

Class variables

var not_within_tags
var in_empty_section
var in_not_empty_section
var in_section_with_one_finished_word
var in_empty_value_section
var in_not_empty_value_section
var escaped

Static methods

def escape(state: str) ‑> str

Converts an unescaped state to its escaped equivalent.

Expand source code Browse git

@staticmethod
def escape(state: str) -> str:
    """Converts an unescaped state to its escaped equivalent."""
    assert not States.is_escaped(state)
    return state + States.escaped

def unescape(state: str) ‑> str

Convert an escaped state to its unescaped equivalent.

Expand source code Browse git

@staticmethod
def unescape(state: str) -> str:
    """Convert an escaped state to its unescaped equivalent."""
    assert States.is_escaped(state)
    return state[:len(state)-len(States.escaped)]

def is_escaped(state: str) ‑> bool

Checks is the current char is an escaped char.

Expand source code Browse git

@staticmethod
def is_escaped(state: str) -> bool:
    """Checks is the current char is an escaped char."""
    return state.endswith(States.escaped)

def switch_escapement(state: str) ‑> str

Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or not.

Expand source code Browse git

@staticmethod
def switch_escapement(state: str) -> str:
    """Returns the escaped or unescaped state of the given state, depending on whether it is currently escaped or
    not."""
    return States.escape(state) if not States.is_escaped(state) else States.unescape(state)

class Transitions

Functions that modify (partially finished) List data structures based on an added character. These functions are used by StateTransitioner for its finite state machine.

All of these functions take a (partially finished) List and a character and return the resulting (possibly just partially finished) modified List. They might possibly still perform in-place operations on the List they receive, though.

Expand source code Browse git

class Transitions:
    """Functions that modify (partially finished) `ParsedTemplate` data structures based on an added character.
    These functions are used by `StateTransitioner` for its finite state machine.

    All of these functions take a (partially finished) `ParsedTemplate` and a character and return the resulting
    (possibly just partially finished) modified `ParsedTemplate`.
    They might possibly still perform in-place operations on the `ParsedTemplate` they receive, though."""

    @staticmethod
    def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Does nothing to the template."""
        return r

    # add character to the template:

    @staticmethod
    def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the text segment the template ends with."""
        r[-1] += c
        return r

    @staticmethod
    def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the section type of the last section of the tag the template ends with."""
        last_section = r[-1].pop()
        r[-1].append((last_section[0] + c, last_section[1]))
        return r

    @staticmethod
    def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a character to the last value of the last section of the tag the template ends with."""
        r[-1][-1][1][-1] += c
        return r

    # start something new in the template:

    @staticmethod
    def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty tag to the end of the template."""
        r.append([("", [])])
        return r

    @staticmethod
    def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty section to the end of the tag the template ends with."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0]]))
        r[-1].append(("", []))
        return r

    @staticmethod
    def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Starts a new section value with its first character `c` in a section with explicitely specified type."""
        r[-1][-1][1].append(c)
        return r

    @staticmethod
    def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was
        thought to be the section type was actually the first value of an un-typed section."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0], c]))
        return r

    @staticmethod
    def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at
        its end yet."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] != "":
            values_of_last_section.append("")
        return r

    @staticmethod
    def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is
        empty."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] == "":
            del values_of_last_section[-1]
        r[-1].append(("", []))
        return r

    # close things in the tag:

    @staticmethod
    def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type."""
        last_section = r[-1].pop()
        r[-1].append(("", [last_section[0]]))
        r.append("")
        return r

    @staticmethod
    def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
        """Ends a tag and deletes its last section's last value if it is empty."""
        values_of_last_section: List[str] = r[-1][-1][1]
        if values_of_last_section[-1] == "":
            del values_of_last_section[-1]
        r.append("")
        return r

Static methods

def do_nothing(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Does nothing to the template.

Expand source code Browse git

@staticmethod
def do_nothing(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Does nothing to the template."""
    return r

def add_to_text(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a character to the text segment the template ends with.

Expand source code Browse git

@staticmethod
def add_to_text(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a character to the text segment the template ends with."""
    r[-1] += c
    return r

def add_to_section_type(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a character to the section type of the last section of the tag the template ends with.

Expand source code Browse git

@staticmethod
def add_to_section_type(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a character to the section type of the last section of the tag the template ends with."""
    last_section = r[-1].pop()
    r[-1].append((last_section[0] + c, last_section[1]))
    return r

def add_to_section_value(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a character to the last value of the last section of the tag the template ends with.

Expand source code Browse git

@staticmethod
def add_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a character to the last value of the last section of the tag the template ends with."""
    r[-1][-1][1][-1] += c
    return r

def start_new_tag(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a new empty tag to the end of the template.

Expand source code Browse git

@staticmethod
def start_new_tag(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a new empty tag to the end of the template."""
    r.append([("", [])])
    return r

def start_new_section_and_convert_section_type_to_section_value(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a new empty section to the end of the tag the template ends with.

Expand source code Browse git

@staticmethod
def start_new_section_and_convert_section_type_to_section_value(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a new empty section to the end of the tag the template ends with."""
    last_section = r[-1].pop()
    r[-1].append(("", [last_section[0]]))
    r[-1].append(("", []))
    return r

def start_first_section_value_in_typed_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Starts a new section value with its first character c in a section with explicitely specified type.

Expand source code Browse git

@staticmethod
def start_first_section_value_in_typed_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Starts a new section value with its first character `c` in a section with explicitely specified type."""
    r[-1][-1][1].append(c)
    return r

def start_second_section_value_in_typeless_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a new section value c to the end of the tag the template ends with, after discovering that what was thought to be the section type was actually the first value of an un-typed section.

Expand source code Browse git

@staticmethod
def start_second_section_value_in_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a new section value `c` to the end of the tag the template ends with, after discovering that what was
    thought to be the section type was actually the first value of an un-typed section."""
    last_section = r[-1].pop()
    r[-1].append(("", [last_section[0], c]))
    return r

def start_new_section_value_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at its end yet.

Expand source code Browse git

@staticmethod
def start_new_section_value_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a new empty value to the last section of the tag the template ends with, if there is no empty value at
    its end yet."""
    values_of_last_section: List[str] = r[-1][-1][1]
    if values_of_last_section[-1] != "":
        values_of_last_section.append("")
    return r

def start_new_section_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is empty.

Expand source code Browse git

@staticmethod
def start_new_section_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Adds a new section to the tag the template ends with, and deletes the last value of the last section if it is
    empty."""
    values_of_last_section: List[str] = r[-1][-1][1]
    if values_of_last_section[-1] == "":
        del values_of_last_section[-1]
    r[-1].append(("", []))
    return r

def end_tag_after_typeless_section(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type.

Expand source code Browse git

@staticmethod
def end_tag_after_typeless_section(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Ends a tag (and adds a new text segment behind it) which ends with a section without a specified type."""
    last_section = r[-1].pop()
    r[-1].append(("", [last_section[0]]))
    r.append("")
    return r

def end_tag_and_delete_last_sections_value_if_empty(r: List[Union[str, List[Tuple[str, List[str]]]]], c: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Ends a tag and deletes its last section's last value if it is empty.

Expand source code Browse git

@staticmethod
def end_tag_and_delete_last_sections_value_if_empty(r: ParsedTemplate, c: str) -> ParsedTemplate:
    """Ends a tag and deletes its last section's last value if it is empty."""
    values_of_last_section: List[str] = r[-1][-1][1]
    if values_of_last_section[-1] == "":
        del values_of_last_section[-1]
    r.append("")
    return r

class StateTransitioner

Translates between states using a finite state machine. This does not take into account the ability to escape characters.

Expand source code Browse git

class StateTransitioner:
    """Translates between states using a finite state machine.
    This does not take into account the ability to escape characters."""

    state_transitions: Dict[str, Dict[str, Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]]] = {
        States.not_within_tags: {
            "{": (
                States.in_empty_section,
                Transitions.start_new_tag),
            Chars.ws: (
                States.not_within_tags,
                Transitions.add_to_text),
            Chars.char: (
                States.not_within_tags,
                Transitions.add_to_text),
            ":": (
                States.not_within_tags,
                Transitions.add_to_text),
            "*": (
                States.not_within_tags,
                Transitions.add_to_text)
        },
        States.in_empty_section: {
            Chars.ws: (
                States.in_empty_section,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_section,
                Transitions.add_to_section_type)
        },
        States.in_not_empty_section: {
            ":": (
                States.in_empty_value_section,
                Transitions.do_nothing),
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_convert_section_type_to_section_value),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_after_typeless_section),
            Chars.ws: (
                States.in_section_with_one_finished_word,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_section,
                Transitions.add_to_section_type)
        },
        States.in_section_with_one_finished_word: {
            ":": (
                States.in_empty_value_section,
                Transitions.do_nothing),
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_convert_section_type_to_section_value),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_after_typeless_section),
            Chars.ws: (
                States.in_section_with_one_finished_word,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.start_second_section_value_in_typeless_section)
        },
        States.in_empty_value_section: {
            Chars.ws: (
                States.in_empty_value_section,
                Transitions.do_nothing),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.start_first_section_value_in_typed_section)
        },
        States.in_not_empty_value_section: {
            "*": (
                States.in_empty_section,
                Transitions.start_new_section_and_delete_last_sections_value_if_empty),
            "}": (
                States.not_within_tags,
                Transitions.end_tag_and_delete_last_sections_value_if_empty),
            Chars.ws: (
                States.in_not_empty_value_section,
                Transitions.start_new_section_value_and_delete_last_sections_value_if_empty),
            Chars.char: (
                States.in_not_empty_value_section,
                Transitions.add_to_section_value)
        }
    }
    """A data structure describing the template syntax as a finite state machine in which, for every state s1 and every
    character c, `state_transitions[s1][c]` contains a tuple `(s2, f)` with s2 as the following state and f as the
    function that is applied to the parsed data to include c in it."""

    @staticmethod
    def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]:
        """For a given state s and a given character c, returns the next state s2 and a function that takes a
        list representation of the already-parsed data and c and returns a modified, extended duplicate of the data
        based on c."""
        type_of_char = Chars.type(char)
        if type_of_char in StateTransitioner.state_transitions[state]:
            return StateTransitioner.state_transitions[state][type_of_char]
        else:
            raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])

Class variables

var state_transitions : Dict[str, Dict[str, Tuple[str, Callable[[List[Union[str, List[Tuple[str, List[str]]]]], str], List[Union[str, List[Tuple[str, List[str]]]]]]]]]: A data structure describing the template syntax as a finite state machine in which, for every state s1 and every character c, state_transitions[s1][c] contains a tuple (s2, f) with s2 as the following state and f as the function that is applied to the parsed data to include c in it.

Static methods

def transition_state(state: str, char: str) ‑> Tuple[str, Callable[[List[Union[str, List[Tuple[str, List[str]]]]], str], List[Union[str, List[Tuple[str, List[str]]]]]]]

For a given state s and a given character c, returns the next state s2 and a function that takes a list representation of the already-parsed data and c and returns a modified, extended duplicate of the data based on c.

Expand source code Browse git

@staticmethod
def transition_state(state: str, char: str) -> Tuple[str, Callable[[ParsedTemplate, str], ParsedTemplate]]:
    """For a given state s and a given character c, returns the next state s2 and a function that takes a
    list representation of the already-parsed data and c and returns a modified, extended duplicate of the data
    based on c."""
    type_of_char = Chars.type(char)
    if type_of_char in StateTransitioner.state_transitions[state]:
        return StateTransitioner.state_transitions[state][type_of_char]
    else:
        raise errors.SyntaxError("Parsing error: \"" + type_of_char + "\" may not occur if it is " + state[3:])

class SectionTypes

Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections.

Expand source code Browse git

class SectionTypes:
    """Capsules a mapping of priorities to section types and methods to assign section types to un-typed sections."""

    section_types_w_priorities = [
        ("context", 1000., True),
        ("id", 950., False),
        ("capitalization", 900., False)
    ]
    """All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)"""

    @staticmethod
    def section_type_accepts_multiple_values(section_type: str) -> bool:
        """Checks whether a section type can have multiple whitespace-separated values."""
        return SectionTypes.section_type_exists(section_type) and bool(
            [i for i in range(len(SectionTypes.section_types_w_priorities))
             if SectionTypes.section_types_w_priorities[i][0] == section_type
             and SectionTypes.section_types_w_priorities[i][2] is True]
        )

    @staticmethod
    def section_type_exists(section_type: str) -> bool:
        """Checks if a section type exists."""
        return bool(
            [i for i in range(len(SectionTypes.section_types_w_priorities))
             if SectionTypes.section_types_w_priorities[i][0] == section_type]
        )

    @staticmethod
    def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]:
        """Receives a list of section types in a tag (in chronological order) and assigns section types to those
        section without a section type, in accordance with the priorities of section types and the specification.
        Returns the typed section type list.
        Raises errors if section matching can not be done, or if no context section could be found."""
        result = list()

        # get all explicitly specified section types into a set:
        already_used = set()
        if len(section_types) > len(SectionTypes.section_types_w_priorities):
            raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.")
        for section_type in filter(lambda x: x != "", section_types):
            if section_type in already_used:
                raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.")
            elif not SectionTypes.section_type_exists(section_type):
                raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.")
            else:
                already_used.add(section_type)

        # create a section priority queue without these element:
        available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used]
        available_sections_types.sort(key=lambda s: s[1])

        # iterate over all declared section types from the left to the right:
        for section_type in reversed(section_types):
            if section_type == "":
                result.insert(0, available_sections_types.pop()[0])
            else:
                result.insert(0, section_type)

        # raise an error if there is no context value:
        if "context" not in result:
            raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.")

        return result

Class variables

var section_types_w_priorities: All supported section types as a list of tuples in the form of (name, priority, can_have_multiple_values)

Static methods

def section_type_accepts_multiple_values(section_type: str) ‑> bool

Checks whether a section type can have multiple whitespace-separated values.

Expand source code Browse git

@staticmethod
def section_type_accepts_multiple_values(section_type: str) -> bool:
    """Checks whether a section type can have multiple whitespace-separated values."""
    return SectionTypes.section_type_exists(section_type) and bool(
        [i for i in range(len(SectionTypes.section_types_w_priorities))
         if SectionTypes.section_types_w_priorities[i][0] == section_type
         and SectionTypes.section_types_w_priorities[i][2] is True]
    )

def section_type_exists(section_type: str) ‑> bool

Checks if a section type exists.

Expand source code Browse git

@staticmethod
def section_type_exists(section_type: str) -> bool:
    """Checks if a section type exists."""
    return bool(
        [i for i in range(len(SectionTypes.section_types_w_priorities))
         if SectionTypes.section_types_w_priorities[i][0] == section_type]
    )

def create_section_types_for_untyped_tag(section_types: List[str]) ‑> List[str]

Receives a list of section types in a tag (in chronological order) and assigns section types to those section without a section type, in accordance with the priorities of section types and the specification. Returns the typed section type list. Raises errors if section matching can not be done, or if no context section could be found.

Expand source code Browse git

@staticmethod
def create_section_types_for_untyped_tag(section_types: List[str]) -> List[str]:
    """Receives a list of section types in a tag (in chronological order) and assigns section types to those
    section without a section type, in accordance with the priorities of section types and the specification.
    Returns the typed section type list.
    Raises errors if section matching can not be done, or if no context section could be found."""
    result = list()

    # get all explicitly specified section types into a set:
    already_used = set()
    if len(section_types) > len(SectionTypes.section_types_w_priorities):
        raise errors.SyntaxPostprocessingError("Tag contains more sections than there are section types.")
    for section_type in filter(lambda x: x != "", section_types):
        if section_type in already_used:
            raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" used twice in a tag.")
        elif not SectionTypes.section_type_exists(section_type):
            raise errors.SyntaxPostprocessingError("Section type \"" + section_type + "\" does not exist.")
        else:
            already_used.add(section_type)

    # create a section priority queue without these element:
    available_sections_types = [s for s in SectionTypes.section_types_w_priorities if s[0] not in already_used]
    available_sections_types.sort(key=lambda s: s[1])

    # iterate over all declared section types from the left to the right:
    for section_type in reversed(section_types):
        if section_type == "":
            result.insert(0, available_sections_types.pop()[0])
        else:
            result.insert(0, section_type)

    # raise an error if there is no context value:
    if "context" not in result:
        raise errors.SyntaxPostprocessingError("Tag misses a \"context\"-section.")

    return result

class GRParser

Unites several static methods of a pipeline for parsing genderrender templates from strings into a list format and refining this representation to the maximum extend possible without additionally seeing the corresponding genderrender pronoun data.

These functions are written to be executed in the order they are called by full_parsing_pipeline, and may or may not behave as expected when called on a value that didn't go through the other functions first.

Expand source code Browse git

class GRParser:
    """Unites several static methods of a pipeline for parsing gender*render templates from strings into a list format
    and refining this representation to the maximum extend possible without additionally seeing the corresponding
    gender*render pronoun data.

    These functions are written to be executed in the order they are called by `full_parsing_pipeline`, and may or may
    not behave as expected when called on a value that didn't go through the other functions first."""

    @staticmethod
    def parse_gr_template_from_str(template: str) -> ParsedTemplate:
        """Takes a gender*render template as a string and returns it as an easily readable list representation.
        This does only do syntactic parsing in accordance to the defining finite state machine;
        further steps in the parsing pipeline are implemented by other methods of this parser.

        The resulting output is of the following structure:
        * value of a section: represented by lists of strings
        * type of section: represented by a string
        * section: tuple of type representation and value representation
        * tag: list of section representation
        * template: list, where every uneven element represents a tag and every even element is a string

        Special characters are all unescaped in the parsed version of the template."""

        result = [""]
        s = States.not_within_tags
        line_no = 1
        char_no = 1
        # iterate over all characters:
        for i in range(len(template)):
            c = template[i]
            # increment char count for SyntaxError raising:
            if c == "\n":
                line_no += 1
                char_no = 1
            else:
                char_no += 1

            # log:
            warnings.WarningManager.raise_warning(
                "result: " + str(result) + "\n\n"
                + "c: \"" + c + "\"\n"
                + "s: " + s + "\n"
                + "char type: " + Chars.type(c),
                warnings.GRSyntaxParsingLogging)

            # do the work of the finite state machine:
            type_of_char = Chars.type(c)
            if States.is_escaped(s):
                s = States.unescape(s)
                s, processing_function = StateTransitioner.state_transitions[s][Chars.char]
                result = processing_function(result, c)
            else:
                if type_of_char == Chars.escape_char:
                    s = States.escape(s)
                else:
                    try:
                        s, processing_function = StateTransitioner.transition_state(s, c)
                        result = processing_function(result, c)
                    except errors.SyntaxError:
                        raise errors.SyntaxError(
                            "The given gender*render template has invalid syntax.",
                            ("unknown file", line_no, char_no, template.split("\n")[line_no - 1])
                        )

        # raise an error if the template ends unproperly:
        if States.is_escaped(s):
            raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.",
                                     ("unknown file", line_no, char_no, template.split("\n")[-1]))
        elif s != States.not_within_tags:
            raise errors.SyntaxError("A tag opens, but is not finished properly.",
                                     ("unknown file", line_no, char_no, template.split("\n")[-1]))

        return result

    @staticmethod
    def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined
        type a section type."""
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(result), 2):
            old_section_types: List[str] = [section[0] for section in result[i]]
            new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types)
            result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))]
        return result

    @staticmethod
    def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of
        tags, one for every context value of the tag.
        This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may
        lead to wrong results otherwise.
        The context section is left the end of the tag by this procedure."""
        result = copy.deepcopy(parsed_template)
        for i in reversed(range(1, len(result), 2)):
            tag_without_context_section = [section for section in result[i] if section[0] != "context"]
            tag_but_only_context_section = [section for section in result[i] if section[0] == "context"]

            # split tag into one tag for every context value:
            context_values = tag_but_only_context_section.pop()[1]
            sequence_of_tags = [
                (copy.deepcopy(tag_without_context_section) + [("context", [context_value])])
                for context_value in context_values
            ]
            for j in reversed(range(1, len(sequence_of_tags))):
                sequence_of_tags.insert(j, " ")
            result[i:i+1] = sequence_of_tags

        return result

    @staticmethod
    def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
        """Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that
        does not allow multiple values has multiple values. This should always be used before calling
        convert_tags_to_indxable_dicts.
        Returns the given dict afterwards."""
        for i in range(len(parsed_template)):
            if i % 2:  # is a tag
                for section_type, section_values in parsed_template[i]:
                    if SectionTypes.section_type_accepts_multiple_values(section_type):
                        continue
                    elif len(section_values) > 1:
                        raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\""
                                                               + ReGRParser.unparse_gr_tag(parsed_template[i])
                                                               + "\") has multiple values in \""
                                                               + section_type +
                                                               "\"-section even though this type of section does"
                                                               + " not support this.")
        return parsed_template

    @staticmethod
    def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined:
        """Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a
        representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}".
        This makes the value of specific types of sections easier to access by other methods.
        Note that the result returned by this method is different in that it is not accepted by the other methods of
        GRParser, and that this method should thus be the last method in this pipeline to be used.
        Raises an error if a section has multiple values yet accepts only one."""
        result = copy.deepcopy(parsed_template)
        for i in range(len(result)):
            if i % 2:  # is a tag
                new_tag = dict()
                for section_type, section_values in result[i]:
                    if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context":
                        new_tag[section_type] = section_values[0]
                    else:
                        new_tag[section_type] = section_values
                result[i] = new_tag

        return result

    @staticmethod
    def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
        """Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every
        tag has a capitalization value."""
        # ToDo: Test this function!
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(parsed_template), 2):
            global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i])
        return result

    @staticmethod
    def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
        """Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template
        where every context value is canonical."""
        result = copy.deepcopy(parsed_template)
        for i in range(1, len(parsed_template), 2):
            result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"])
        return result

    @staticmethod
    def full_parsing_pipeline(template: str) -> ParsedTemplateRefined:
        """Walks template through the full parsing pipeline defined by `GRParser`, and returns the result."""
        template = GRParser.parse_gr_template_from_str(template)
        template = GRParser.assign_types_to_all_sections(template)
        template = GRParser.split_tags_with_multiple_context_values(template)
        template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template)
        template = GRParser.convert_tags_to_indexable_dicts(template)
        template = GRParser.set_capitalization_value_for_all_tags(template)
        template = GRParser.convert_context_values_to_canonicals(template)
        return template

    @staticmethod
    def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]:
        """Returns a frozen set of all id values explicitly specified by tags in the parsed template."""
        return frozenset(
            parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i]
        )

    @staticmethod
    def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool:
        """Returns whether the parsed template contains tags with unspecified id value."""
        return bool(list(
            parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i]
        ))

Static methods

def parse_gr_template_from_str(template: str) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Takes a gender*render template as a string and returns it as an easily readable list representation. This does only do syntactic parsing in accordance to the defining finite state machine; further steps in the parsing pipeline are implemented by other methods of this parser.

The resulting output is of the following structure: * value of a section: represented by lists of strings * type of section: represented by a string * section: tuple of type representation and value representation * tag: list of section representation * template: list, where every uneven element represents a tag and every even element is a string

Special characters are all unescaped in the parsed version of the template.

Expand source code Browse git

@staticmethod
def parse_gr_template_from_str(template: str) -> ParsedTemplate:
    """Takes a gender*render template as a string and returns it as an easily readable list representation.
    This does only do syntactic parsing in accordance to the defining finite state machine;
    further steps in the parsing pipeline are implemented by other methods of this parser.

    The resulting output is of the following structure:
    * value of a section: represented by lists of strings
    * type of section: represented by a string
    * section: tuple of type representation and value representation
    * tag: list of section representation
    * template: list, where every uneven element represents a tag and every even element is a string

    Special characters are all unescaped in the parsed version of the template."""

    result = [""]
    s = States.not_within_tags
    line_no = 1
    char_no = 1
    # iterate over all characters:
    for i in range(len(template)):
        c = template[i]
        # increment char count for SyntaxError raising:
        if c == "\n":
            line_no += 1
            char_no = 1
        else:
            char_no += 1

        # log:
        warnings.WarningManager.raise_warning(
            "result: " + str(result) + "\n\n"
            + "c: \"" + c + "\"\n"
            + "s: " + s + "\n"
            + "char type: " + Chars.type(c),
            warnings.GRSyntaxParsingLogging)

        # do the work of the finite state machine:
        type_of_char = Chars.type(c)
        if States.is_escaped(s):
            s = States.unescape(s)
            s, processing_function = StateTransitioner.state_transitions[s][Chars.char]
            result = processing_function(result, c)
        else:
            if type_of_char == Chars.escape_char:
                s = States.escape(s)
            else:
                try:
                    s, processing_function = StateTransitioner.transition_state(s, c)
                    result = processing_function(result, c)
                except errors.SyntaxError:
                    raise errors.SyntaxError(
                        "The given gender*render template has invalid syntax.",
                        ("unknown file", line_no, char_no, template.split("\n")[line_no - 1])
                    )

    # raise an error if the template ends unproperly:
    if States.is_escaped(s):
        raise errors.SyntaxError("The template ends with an unescaped escape character, please escape it.",
                                 ("unknown file", line_no, char_no, template.split("\n")[-1]))
    elif s != States.not_within_tags:
        raise errors.SyntaxError("A tag opens, but is not finished properly.",
                                 ("unknown file", line_no, char_no, template.split("\n")[-1]))

    return result

def assign_types_to_all_sections(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined type a section type.

Expand source code Browse git

@staticmethod
def assign_types_to_all_sections(parsed_template: ParsedTemplate) -> ParsedTemplate:
    """Takes a parsed template (as it is created by all methods of GRParser) and assigns every section of undefined
    type a section type."""
    result = copy.deepcopy(parsed_template)
    for i in range(1, len(result), 2):
        old_section_types: List[str] = [section[0] for section in result[i]]
        new_section_types: List[str] = SectionTypes.create_section_types_for_untyped_tag(old_section_types)
        result[i] = [(new_section_types[s], result[i][s][1]) for s in range(len(new_section_types))]
    return result

def split_tags_with_multiple_context_values(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of tags, one for every context value of the tag. This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may lead to wrong results otherwise. The context section is left the end of the tag by this procedure.

Expand source code Browse git

@staticmethod
def split_tags_with_multiple_context_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
    """Takes a parsed template (as it is created by all methods of GRParser) and splits every tag into a sequence of
    tags, one for every context value of the tag.
    This assumes that every section was already assigned a type by GRParser.assign_types_to_all_sections, and may
    lead to wrong results otherwise.
    The context section is left the end of the tag by this procedure."""
    result = copy.deepcopy(parsed_template)
    for i in reversed(range(1, len(result), 2)):
        tag_without_context_section = [section for section in result[i] if section[0] != "context"]
        tag_but_only_context_section = [section for section in result[i] if section[0] == "context"]

        # split tag into one tag for every context value:
        context_values = tag_but_only_context_section.pop()[1]
        sequence_of_tags = [
            (copy.deepcopy(tag_without_context_section) + [("context", [context_value])])
            for context_value in context_values
        ]
        for j in reversed(range(1, len(sequence_of_tags))):
            sequence_of_tags.insert(j, " ")
        result[i:i+1] = sequence_of_tags

    return result

def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, List[Tuple[str, List[str]]]]]

Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that does not allow multiple values has multiple values. This should always be used before calling convert_tags_to_indxable_dicts. Returns the given dict afterwards.

Expand source code Browse git

@staticmethod
def make_sure_that_sections_dont_exceed_allowed_amount_of_values(parsed_template: ParsedTemplate) -> ParsedTemplate:
    """Takes a parsed template (as it is created by all methods of GRParser) and raises an error if any tag that
    does not allow multiple values has multiple values. This should always be used before calling
    convert_tags_to_indxable_dicts.
    Returns the given dict afterwards."""
    for i in range(len(parsed_template)):
        if i % 2:  # is a tag
            for section_type, section_values in parsed_template[i]:
                if SectionTypes.section_type_accepts_multiple_values(section_type):
                    continue
                elif len(section_values) > 1:
                    raise errors.SyntaxPostprocessingError("Tag no. " + str((i + 1) / 2) + " (\""
                                                           + ReGRParser.unparse_gr_tag(parsed_template[i])
                                                           + "\") has multiple values in \""
                                                           + section_type +
                                                           "\"-section even though this type of section does"
                                                           + " not support this.")
    return parsed_template

def convert_tags_to_indexable_dicts(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]

Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}". This makes the value of specific types of sections easier to access by other methods. Note that the result returned by this method is different in that it is not accepted by the other methods of GRParser, and that this method should thus be the last method in this pipeline to be used. Raises an error if a section has multiple values yet accepts only one.

Expand source code Browse git

@staticmethod
def convert_tags_to_indexable_dicts(parsed_template: ParsedTemplate) -> ParsedTemplateRefined:
    """Takes a parsed template (as it is created by all methods of GRParser) and converts every tag from a
    representation a la "[(a, b), (c, d)]" to a representation a la "{a: b, c: d}".
    This makes the value of specific types of sections easier to access by other methods.
    Note that the result returned by this method is different in that it is not accepted by the other methods of
    GRParser, and that this method should thus be the last method in this pipeline to be used.
    Raises an error if a section has multiple values yet accepts only one."""
    result = copy.deepcopy(parsed_template)
    for i in range(len(result)):
        if i % 2:  # is a tag
            new_tag = dict()
            for section_type, section_values in result[i]:
                if not SectionTypes.section_type_accepts_multiple_values(section_type) or section_type == "context":
                    new_tag[section_type] = section_values[0]
                else:
                    new_tag[section_type] = section_values
            result[i] = new_tag

    return result

def set_capitalization_value_for_all_tags(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]

Takes a parsed template as returned by GRParser.convert_tags_to_indexable_dicts() and makes sure every tag has a capitalization value.

Expand source code Browse git

@staticmethod
def set_capitalization_value_for_all_tags(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
    """Takes a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` and makes sure every
    tag has a capitalization value."""
    # ToDo: Test this function!
    result = copy.deepcopy(parsed_template)
    for i in range(1, len(parsed_template), 2):
        global_capitalization_system.assign_and_check_capitalization_value_of_tag(result[i])
    return result

def convert_context_values_to_canonicals(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]

Converts a parsed template as returned by GRParser.convert_tags_to_indexable_dicts() to a parsed template where every context value is canonical.

Expand source code Browse git

@staticmethod
def convert_context_values_to_canonicals(parsed_template: ParsedTemplateRefined) -> ParsedTemplateRefined:
    """Converts a parsed template as returned by `GRParser.convert_tags_to_indexable_dicts` to a parsed template
    where every context value is canonical."""
    result = copy.deepcopy(parsed_template)
    for i in range(1, len(parsed_template), 2):
        result[i]["context"] = handle_context_values.ContextValues.get_canonical(result[i]["context"])
    return result

def full_parsing_pipeline(template: str) ‑> List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]

Walks template through the full parsing pipeline defined by GRParser, and returns the result.

Expand source code Browse git

@staticmethod
def full_parsing_pipeline(template: str) -> ParsedTemplateRefined:
    """Walks template through the full parsing pipeline defined by `GRParser`, and returns the result."""
    template = GRParser.parse_gr_template_from_str(template)
    template = GRParser.assign_types_to_all_sections(template)
    template = GRParser.split_tags_with_multiple_context_values(template)
    template = GRParser.make_sure_that_sections_dont_exceed_allowed_amount_of_values(template)
    template = GRParser.convert_tags_to_indexable_dicts(template)
    template = GRParser.set_capitalization_value_for_all_tags(template)
    template = GRParser.convert_context_values_to_canonicals(template)
    return template

def get_all_specified_id_values(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> FrozenSet[str]

Returns a frozen set of all id values explicitly specified by tags in the parsed template.

Expand source code Browse git

@staticmethod
def get_all_specified_id_values(parsed_template: ParsedTemplateRefined) -> FrozenSet[str]:
    """Returns a frozen set of all id values explicitly specified by tags in the parsed template."""
    return frozenset(
        parsed_template[i]["id"] for i in range(1, len(parsed_template), 2) if "id" in parsed_template[i]
    )

def template_contains_unspecified_ids(parsed_template: List[Union[str, Dict[str, Union[str, List[str], GenderedNoun]]]]) ‑> bool

Returns whether the parsed template contains tags with unspecified id value.

Expand source code Browse git

@staticmethod
def template_contains_unspecified_ids(parsed_template: ParsedTemplateRefined) -> bool:
    """Returns whether the parsed template contains tags with unspecified id value."""
    return bool(list(
        parsed_template[i] for i in range(1, len(parsed_template), 2) if "id" not in parsed_template[i]
    ))

class ReGRParser

Bundles methods to get a valid gender*render template from ParsedTemplate.

Expand source code Browse git

class ReGRParser:
    """Bundles methods to get a valid gender*render template from ParsedTemplate."""

    @staticmethod
    def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str:
        return "{" + "*".join([(
                            ((Chars.escape_gr_string(section[0]) + ":") if section[0] else "")
                            + " ".join([Chars.escape_gr_string(value) for value in section[1]])
                        ) for section in tag_representation]) + "}"

    @staticmethod
    def unparse_gr_template(parsed_template: ParsedTemplate) -> str:
        """Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to
        the given parsed template.
        This may be used for testing purposes or to simplify gender*render templates."""
        result = str()
        for i in range(len(parsed_template)):
            if i % 2:  # is a tag
                result += ReGRParser.unparse_gr_tag(parsed_template[i])
            else:  # is a string
                result += Chars.escape_gr_string(parsed_template[i], strict=False)
        return result

Static methods

def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) ‑> str

Expand source code Browse git

@staticmethod
def unparse_gr_tag(tag_representation: List[Tuple[str, List[str]]]) -> str:
    return "{" + "*".join([(
                        ((Chars.escape_gr_string(section[0]) + ":") if section[0] else "")
                        + " ".join([Chars.escape_gr_string(value) for value in section[1]])
                    ) for section in tag_representation]) + "}"

def unparse_gr_template(parsed_template: List[Union[str, List[Tuple[str, List[str]]]]]) ‑> str

Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to the given parsed template. This may be used for testing purposes or to simplify gender*render templates.

Expand source code Browse git

@staticmethod
def unparse_gr_template(parsed_template: ParsedTemplate) -> str:
    """Takes the result of any method of the GRParser class and returns a template (as a string) that corresponds to
    the given parsed template.
    This may be used for testing purposes or to simplify gender*render templates."""
    result = str()
    for i in range(len(parsed_template)):
        if i % 2:  # is a tag
            result += ReGRParser.unparse_gr_tag(parsed_template[i])
        else:  # is a string
            result += Chars.escape_gr_string(parsed_template[i], strict=False)
    return result