Alliteration¶

`poetry_analysis.alliteration` ¶

The definition of alliteration that we use here is the repetition of word-initial consonants or consonant clusters.

`count_alliteration(text)` ¶

Count the number of times the same word-initial letter occurs in a text.

Examples:

>>> text = "Sirius som seer"
>>> count_alliteration(text)
{'s': 3}

Source code in src/poetry_analysis/alliteration.py

def count_alliteration(text: str) -> dict:
    """Count the number of times the same word-initial letter occurs in a text.

    Examples:
        >>> text = "Sirius som seer"
        >>> count_alliteration(text)
        {'s': 3}
    """
    words = text.split()
    initial_counts = {}

    for word in words:
        initial_letter = word[0].lower()
        if initial_letter in initial_counts:
            initial_counts[initial_letter] += 1
        else:
            initial_counts[initial_letter] = 1

    alliteration_count = {letter: count for letter, count in initial_counts.items() if count > 1}

    return alliteration_count

`extract_alliteration(text)` ¶

Extract words that start with the same letter from a text.

NB! This function is case-insensitive and compares e.g. S to s as the same letter.

Parameters:

Name	Type	Description	Default
`text`	`list`	A list of strings, where each string is a line of text.	required

Examples:

>>> text = ['Stjerneklare Septembernat Sees Sirius', 'Sydhimlens smukkeste Stjerne']
>>> extract_alliteration(text)
[{'line': 0, 'symbol': 's', 'count': 4, 'words': ['Stjerneklare', 'Septembernat', 'Sees', 'Sirius']}, {'line': 1, 'symbol': 's', 'count': 3, 'words': ['Sydhimlens', 'smukkeste', 'Stjerne']}]

Source code in src/poetry_analysis/alliteration.py

def extract_alliteration(text: list[str]) -> list[dict]:
    """Extract words that start with the same letter from a text.

    NB! This function is case-insensitive and compares e.g. S to s as the same letter.

    Args:
        text (list): A list of strings, where each string is a line of text.

    Examples:
        >>> text = ['Stjerneklare Septembernat Sees Sirius', 'Sydhimlens smukkeste Stjerne']
        >>> extract_alliteration(text)
        [{'line': 0, 'symbol': 's', 'count': 4, 'words': ['Stjerneklare', 'Septembernat', 'Sees', 'Sirius']}, {'line': 1, 'symbol': 's', 'count': 3, 'words': ['Sydhimlens', 'smukkeste', 'Stjerne']}]
    """

    alliterations = []

    for i, line in enumerate(text):
        words = line.split() if isinstance(line, str) else line
        seen = {}
        for j, word in enumerate(words):
            initial_letter = word[0].lower()
            if not initial_letter.isalpha():
                continue

            if initial_letter in seen:
                seen[initial_letter].append(word)
            else:
                seen[initial_letter] = [word]

            if (j == len(words) - 1) and any(len(v) > 1 for v in seen.values()):
                alliteration_symbols = [k for k, v in seen.items() if len(v) > 1]
                for symbol in alliteration_symbols:
                    alliterations.append(
                        {
                            "line": i,
                            "symbol": symbol,
                            "count": len(seen[symbol]),
                            "words": seen[symbol],
                        }
                    )

    return alliterations

`find_line_alliterations(text, allowed_intervening_words=None)` ¶

Find alliterations on a line.

Source code in src/poetry_analysis/alliteration.py

def find_line_alliterations(text: str | list, allowed_intervening_words: list | None = None):
    """Find alliterations on a line."""
    if allowed_intervening_words is None:
        allowed_intervening_words = ["og", "i", "er"]

    if isinstance(text, list):
        words = text
    elif isinstance(text, str):
        words = utils.normalize(text)

    # Stores {initial_letter: [indices_of_words_starting_with_this_letter]}
    seen = {}
    for j, word_token in enumerate(words):
        if not word_token:  # Handle potential empty strings from tokenizer
            continue
        # Ensure word_token is not empty before accessing word_token[0]
        if not word_token[0].isalpha():
            continue
        initial_letter = word_token[0].lower()

        if initial_letter in seen:
            seen[initial_letter].append(j)
        else:
            seen[initial_letter] = [j]

    alliteration_annotations = []
    # The following logic identifies all groups of words in the line that start with the same consonant,
    # treating them as alliterations if the initial letter appears more than once and grouping them
    # while allowing certain intervening words.
    if any(len(idx_list) > 1 for idx_list in seen.values()):
        for symbol, positions in seen.items():
            if is_vowel(symbol):  # Only extract consonant alliterations
                continue
            if len(positions) > 1:  # Need at least two words starting with this letter
                # Group indices considering allowed intervening words
                alliterating_groups = group_alliterating_indices(positions, words, allowed_intervening_words)

                for group_indices in alliterating_groups:
                    # group_alliterating_indices already ensures len(group_indices) >= 2
                    alliteration_annotations.append([words[p] for p in group_indices])

    return alliteration_annotations if alliteration_annotations else None

`group_alliterating_indices(indices, all_words_in_line, stop_words)` ¶

Groups indices of words that alliterate, allowing specified stop_words in between.

Source code in src/poetry_analysis/alliteration.py

def group_alliterating_indices(indices: list, all_words_in_line: list, stop_words: list):
    """
    Groups indices of words that alliterate, allowing specified stop_words in between.
    """
    if not indices:
        return []

    result_groups = []
    current_group_indices = [indices[0]]

    for i in range(1, len(indices)):
        prev_allit_idx = current_group_indices[-1]
        current_potential_idx = indices[i]

        can_extend_group = True
        # Check words between prev_allit_idx and current_potential_idx
        if current_potential_idx > prev_allit_idx + 1:
            for intervening_idx in range(prev_allit_idx + 1, current_potential_idx):
                if (
                    intervening_idx >= len(all_words_in_line)
                    or not all_words_in_line[intervening_idx]
                    or all_words_in_line[intervening_idx].lower() not in stop_words
                ):
                    can_extend_group = False
                    break

        if can_extend_group:
            current_group_indices.append(current_potential_idx)
        else:
            # Store group if it has at least 2 alliterating words
            if len(current_group_indices) >= 2:
                result_groups.append(list(current_group_indices))  # Store a copy
            current_group_indices = [current_potential_idx]

    # Add the last formed group if it's valid
    if len(current_group_indices) >= 2:
        result_groups.append(list(current_group_indices))

    return result_groups

Alliteration¶

poetry_analysis.alliteration ¶

count_alliteration(text) ¶

extract_alliteration(text) ¶

find_line_alliterations(text, allowed_intervening_words=None) ¶

group_alliterating_indices(indices, all_words_in_line, stop_words) ¶

`poetry_analysis.alliteration` ¶

`count_alliteration(text)` ¶

`extract_alliteration(text)` ¶

`find_line_alliterations(text, allowed_intervening_words=None)` ¶

`group_alliterating_indices(indices, all_words_in_line, stop_words)` ¶