Utility functions¶

`poetry_analysis.utils` ¶

`convert_to_syllables(phonemes, ipa=False)` ¶

Turn a sequence of phonemes into syllable groups.

Source code in src/poetry_analysis/utils.py

def convert_to_syllables(phonemes: str | list, ipa: bool = False) -> list:
    """Turn a sequence of phonemes into syllable groups."""
    transcription = phonemes if isinstance(phonemes, str) else " ".join(phonemes)
    if ipa:
        ipa_str = nofabet_to_ipa(transcription)
        syllables = ipa_str.split(".")
    else:
        nofabet_syllables = nofabet_to_syllables(transcription)
        syllables = [" ".join(syll) for syll in nofabet_syllables]
    return syllables

`endswith(sequence, suffix)` ¶

Check if a sequence ends with a given suffix.

Source code in src/poetry_analysis/utils.py

def endswith(sequence: str | list[str], suffix: str) -> bool:
    """Check if a sequence ends with a given suffix."""
    if isinstance(sequence, str):
        return sequence.endswith(suffix)
    elif isinstance(sequence, list):
        last_element = sequence.copy().pop()
        if isinstance(last_element, str):
            return last_element.endswith(suffix)
        return False
    return False

`gather_stanza_annotations(func)` ¶

Decorator to apply a function to each stanza in a text.

Source code in src/poetry_analysis/utils.py

def gather_stanza_annotations(func) -> Callable:
    """Decorator to apply a function to each stanza in a text."""

    def wrapper(text: str) -> dict:
        stanzas = split_stanzas(text)
        stanza_annotations = {}
        for i, stanza in enumerate(stanzas, 1):
            stanza_text = "\n".join(stanza)
            stanza_annotations[f"stanza_{i}"] = func(stanza_text)
        return stanza_annotations

    return wrapper

`group_consecutive_numbers(nums)` ¶

Group consecutive numbers into sublists.

Examples:

>>> list_of_numbers = [1, 2, 3, 5, 6, 8, 9, 10]
>>> result = group_consecutive_numbers(list_of_numbers)
>>> print(result)
[[1, 2, 3], [5, 6], [8, 9, 10]]

Source code in src/poetry_analysis/utils.py

def group_consecutive_numbers(nums: list[int]) -> list[list[int]]:
    """Group consecutive numbers into sublists.

    Examples:
        >>> list_of_numbers = [1, 2, 3, 5, 6, 8, 9, 10]
        >>> result = group_consecutive_numbers(list_of_numbers)
        >>> print(result)
        [[1, 2, 3], [5, 6], [8, 9, 10]]
    """
    if not nums:
        return []

    nums = sorted(nums)
    result = []
    current_group = [nums[0]]

    for i in range(1, len(nums)):
        if nums[i] == nums[i - 1] + 1:
            current_group.append(nums[i])
        else:
            result.append(current_group)
            current_group = [nums[i]]

    result.append(current_group)
    return result

`is_punctuation(char)` ¶

Check if a character is a punctuation mark.

Source code in src/poetry_analysis/utils.py

def is_punctuation(char: str) -> bool:
    """Check if a character is a punctuation mark."""
    return char in PUNCTUATION_MARKS

`is_valid_onset(phonelist)` ¶

WORK IN PROGRESS Check if a sequence of characters forms a valid onset in Norwegian orthography.

Parameters:

Name	Type	Description	Default
`phonelist`	`str`	A string representing the onset (e.g., "bl", "tr").	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the onset is valid, False otherwise.

Source code in src/poetry_analysis/utils.py

def is_valid_onset(phonelist: str) -> bool:
    """
    WORK IN PROGRESS
    Check if a sequence of characters forms a valid onset in Norwegian orthography.

    Args:
        phonelist (str): A string representing the onset (e.g., "bl", "tr").

    Returns:
        bool: True if the onset is valid, False otherwise.
    """
    # Define valid single consonants and consonant clusters for Norwegian
    valid_single_consonants = set("bcdfghjklmnpqrstvwxyz")
    valid_clusters = {
        "bj",
        "bl",
        "br",
        "dr",
        "dj",
        "fl",
        "fj",
        "fr",
        "gl",
        "gr",
        "gj",
        "kj",
        "kl",
        "kr",
        "kn",
        "kv",
        "pl",
        "pj",
        "pr",
        "mj",
        "nj",
        "sj",
        "sl",
        "sm",
        "sn",
        "sp",
        "st",
        "sv",
        "tr",
        "tj",
        "tl",
        "vr",
        "sk",
        "skr",
        "spr",
        "str",
        "skj",
        "gn",
        "hv",
    }

    if len(phonelist) == 1 and phonelist in valid_single_consonants:
        return True

    return phonelist in valid_clusters

`make_comparable_string(item)` ¶

Convert a list of strings into a single comparable string.

Source code in src/poetry_analysis/utils.py

def make_comparable_string(item: list | str) -> str:
    """Convert a list of strings into a single comparable string."""
    string = " ".join(item) if isinstance(item, list) else str(item)
    string = strip_punctuation(string)
    string = re.sub(r"[0123]", "", string)  # remove stress markers
    return string.casefold()

`normalize(text)` ¶

Lowercase, remove punctuation and tokenize a string of text.

Source code in src/poetry_analysis/utils.py

def normalize(text: str) -> list[str]:
    """Lowercase, remove punctuation and tokenize a string of text."""
    lowercase = text.strip().lower()
    alpanumeric_only = strip_punctuation(lowercase)
    words = tokenize(alpanumeric_only)
    return words

`split_orthographic_text_into_syllables(words)` ¶

WORK IN PROGRESS Split orthographic text into syllables using basic rules. This is a simplified implementation and may not handle all edge cases.

Parameters:

Name	Type	Description	Default
`words`	`list of str`	A list of orthographic words, already tokenized	required

Returns:

Name	Type	Description
`list`	`list`	A list of syllables for each word in the text.

Source code in src/poetry_analysis/utils.py

def split_orthographic_text_into_syllables(words: list[str]) -> list:
    """
    WORK IN PROGRESS
    Split orthographic text into syllables using basic rules.
    This is a simplified implementation and may not handle all edge cases.

    Args:
        words (list of str): A list of orthographic words, already tokenized

    Returns:
        list: A list of syllables for each word in the text.
    """
    syllables = []

    for word in words:
        word_syllables = []
        current_syllable = ""

        for i, char in enumerate(word):
            current_syllable += char

            # Check if the character is a vowel
            if char in VALID_NUCLEI:
                # Check if the next character could be part of the same nucleus
                is_not_last = i + 1 < len(word)
                if is_not_last and word[i + 1] in VALID_NUCLEI:
                    continue

                # Check if the next character could be part of a valid onset
                if is_not_last and word[i + 1] not in VALID_NUCLEI:
                    consonant_cluster = char + word[i + 1]
                    if len(consonant_cluster) > 1 and is_valid_onset(consonant_cluster):
                        continue

                # Otherwise, split the syllable
                word_syllables.append(current_syllable)
                current_syllable = ""

        # Add any remaining characters as a syllable
        if current_syllable:
            word_syllables.append(current_syllable)

        syllables.append(word_syllables)

    return syllables

`split_paragraphs(text)` ¶

Split a text into paragraphs and paragraphs into lines.

Source code in src/poetry_analysis/utils.py

def split_paragraphs(text: str) -> list:
    """Split a text into paragraphs and paragraphs into lines."""
    return [
        [line.rstrip() for line in paragraph.rstrip().splitlines()]
        for paragraph in re.split("\n{2,}", text)
        if paragraph
    ]

`split_stanzas(text)` ¶

Split a poem into stanzas and stanzas into verses.

Source code in src/poetry_analysis/utils.py

def split_stanzas(text: str) -> list:
    """Split a poem into stanzas and stanzas into verses."""
    return [[verse.rstrip() for verse in stanza.rstrip().splitlines()] for stanza in re.split("\n{2,}", text) if stanza]

`strip_punctuation(string)` ¶

Remove punctuation from a string

Source code in src/poetry_analysis/utils.py

def strip_punctuation(string: str) -> str:
    """Remove punctuation from a string"""
    alphanumstr = ""
    for char in string:
        if not is_punctuation(char):
            alphanumstr += char
    return strip_redundant_whitespace(alphanumstr)

`strip_redundant_whitespace(text)` ¶

Strip redundant whitespace and reduce it to a single space.

Source code in src/poetry_analysis/utils.py

def strip_redundant_whitespace(text: str) -> str:
    """Strip redundant whitespace and reduce it to a single space."""
    return re.sub(r"\s+", " ", text).strip()

`syllabify(transcription)` ¶

Flatten list of syllables from a list of transcribed words.

Source code in src/poetry_analysis/utils.py

def syllabify(transcription: list[list]) -> list:
    """Flatten list of syllables from a list of transcribed words."""
    syllables = [
        syll  # if syll is not None else "NONE"
        for word, pron in transcription
        for syll in convert_to_syllables(pron, ipa=False)
    ]
    return syllables

Utility functions¶

poetry_analysis.utils ¶

convert_to_syllables(phonemes, ipa=False) ¶

endswith(sequence, suffix) ¶

gather_stanza_annotations(func) ¶

group_consecutive_numbers(nums) ¶

is_punctuation(char) ¶

is_valid_onset(phonelist) ¶

make_comparable_string(item) ¶

normalize(text) ¶

split_orthographic_text_into_syllables(words) ¶

split_paragraphs(text) ¶

split_stanzas(text) ¶

strip_punctuation(string) ¶

strip_redundant_whitespace(text) ¶

syllabify(transcription) ¶

`poetry_analysis.utils` ¶

`convert_to_syllables(phonemes, ipa=False)` ¶

`endswith(sequence, suffix)` ¶

`gather_stanza_annotations(func)` ¶

`group_consecutive_numbers(nums)` ¶

`is_punctuation(char)` ¶

`is_valid_onset(phonelist)` ¶

`make_comparable_string(item)` ¶

`normalize(text)` ¶

`split_orthographic_text_into_syllables(words)` ¶

`split_paragraphs(text)` ¶

`split_stanzas(text)` ¶

`strip_punctuation(string)` ¶

`strip_redundant_whitespace(text)` ¶

`syllabify(transcription)` ¶