Skip to content

Alliteration

poetry_analysis.alliteration

The definition of alliteration that we use here is the repetition of word-initial consonants or consonant clusters.

count_alliteration(text)

Count the number of times the same word-initial letter occurs in a text.

Examples:

>>> text = "Sirius som seer"
>>> count_alliteration(text)
{'s': 3}
Source code in src/poetry_analysis/alliteration.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def count_alliteration(text: str) -> dict:
    """Count the number of times the same word-initial letter occurs in a text.

    Examples:
        >>> text = "Sirius som seer"
        >>> count_alliteration(text)
        {'s': 3}
    """
    words = text.split()
    initial_counts = {}

    for word in words:
        initial_letter = word[0].lower()
        if initial_letter in initial_counts:
            initial_counts[initial_letter] += 1
        else:
            initial_counts[initial_letter] = 1

    alliteration_count = {letter: count for letter, count in initial_counts.items() if count > 1}

    return alliteration_count

extract_alliteration(text)

Extract words that start with the same letter from a text.

NB! This function is case-insensitive and compares e.g. S to s as the same letter.

Parameters:

Name Type Description Default
text list

A list of strings, where each string is a line of text.

required

Examples:

>>> text = ['Stjerneklare Septembernat Sees Sirius', 'Sydhimlens smukkeste Stjerne']
>>> extract_alliteration(text)
[{'line': 0, 'symbol': 's', 'count': 4, 'words': ['Stjerneklare', 'Septembernat', 'Sees', 'Sirius']}, {'line': 1, 'symbol': 's', 'count': 3, 'words': ['Sydhimlens', 'smukkeste', 'Stjerne']}]
Source code in src/poetry_analysis/alliteration.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def extract_alliteration(text: list[str]) -> list[dict]:
    """Extract words that start with the same letter from a text.

    NB! This function is case-insensitive and compares e.g. S to s as the same letter.

    Args:
        text (list): A list of strings, where each string is a line of text.

    Examples:
        >>> text = ['Stjerneklare Septembernat Sees Sirius', 'Sydhimlens smukkeste Stjerne']
        >>> extract_alliteration(text)
        [{'line': 0, 'symbol': 's', 'count': 4, 'words': ['Stjerneklare', 'Septembernat', 'Sees', 'Sirius']}, {'line': 1, 'symbol': 's', 'count': 3, 'words': ['Sydhimlens', 'smukkeste', 'Stjerne']}]
    """

    alliterations = []

    for i, line in enumerate(text):
        words = line.split() if isinstance(line, str) else line
        seen = {}
        for j, word in enumerate(words):
            initial_letter = word[0].lower()
            if not initial_letter.isalpha():
                continue

            if initial_letter in seen:
                seen[initial_letter].append(word)
            else:
                seen[initial_letter] = [word]

            if (j == len(words) - 1) and any(len(v) > 1 for v in seen.values()):
                alliteration_symbols = [k for k, v in seen.items() if len(v) > 1]
                for symbol in alliteration_symbols:
                    alliterations.append(
                        {
                            "line": i,
                            "symbol": symbol,
                            "count": len(seen[symbol]),
                            "words": seen[symbol],
                        }
                    )

    return alliterations

find_line_alliterations(text, allowed_intervening_words=None)

Find alliterations on a line.

Source code in src/poetry_analysis/alliteration.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def find_line_alliterations(text: str | list, allowed_intervening_words: list | None = None):
    """Find alliterations on a line."""
    if allowed_intervening_words is None:
        allowed_intervening_words = ["og", "i", "er"]

    if isinstance(text, list):
        words = text
    elif isinstance(text, str):
        words = utils.normalize(text)

    # Stores {initial_letter: [indices_of_words_starting_with_this_letter]}
    seen = {}
    for j, word_token in enumerate(words):
        if not word_token:  # Handle potential empty strings from tokenizer
            continue
        # Ensure word_token is not empty before accessing word_token[0]
        if not word_token[0].isalpha():
            continue
        initial_letter = word_token[0].lower()

        if initial_letter in seen:
            seen[initial_letter].append(j)
        else:
            seen[initial_letter] = [j]

    alliteration_annotations = []
    # The following logic identifies all groups of words in the line that start with the same consonant,
    # treating them as alliterations if the initial letter appears more than once and grouping them
    # while allowing certain intervening words.
    if any(len(idx_list) > 1 for idx_list in seen.values()):
        for symbol, positions in seen.items():
            if is_vowel(symbol):  # Only extract consonant alliterations
                continue
            if len(positions) > 1:  # Need at least two words starting with this letter
                # Group indices considering allowed intervening words
                alliterating_groups = group_alliterating_indices(positions, words, allowed_intervening_words)

                for group_indices in alliterating_groups:
                    # group_alliterating_indices already ensures len(group_indices) >= 2
                    alliteration_annotations.append([words[p] for p in group_indices])

    return alliteration_annotations if alliteration_annotations else None

group_alliterating_indices(indices, all_words_in_line, stop_words)

Groups indices of words that alliterate, allowing specified stop_words in between.

Source code in src/poetry_analysis/alliteration.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def group_alliterating_indices(indices: list, all_words_in_line: list, stop_words: list):
    """
    Groups indices of words that alliterate, allowing specified stop_words in between.
    """
    if not indices:
        return []

    result_groups = []
    current_group_indices = [indices[0]]

    for i in range(1, len(indices)):
        prev_allit_idx = current_group_indices[-1]
        current_potential_idx = indices[i]

        can_extend_group = True
        # Check words between prev_allit_idx and current_potential_idx
        if current_potential_idx > prev_allit_idx + 1:
            for intervening_idx in range(prev_allit_idx + 1, current_potential_idx):
                if (
                    intervening_idx >= len(all_words_in_line)
                    or not all_words_in_line[intervening_idx]
                    or all_words_in_line[intervening_idx].lower() not in stop_words
                ):
                    can_extend_group = False
                    break

        if can_extend_group:
            current_group_indices.append(current_potential_idx)
        else:
            # Store group if it has at least 2 alliterating words
            if len(current_group_indices) >= 2:
                result_groups.append(list(current_group_indices))  # Store a copy
            current_group_indices = [current_potential_idx]

    # Add the last formed group if it's valid
    if len(current_group_indices) >= 2:
        result_groups.append(list(current_group_indices))

    return result_groups