File size: 13,966 Bytes
45c71f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
import random
import re
from typing import List, Tuple, Optional, Dict
import nltk
from nltk.corpus import wordnet as wn

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet', quiet=True)

BANNED_WORDS = {
    "fuck", "shit", "damn", "sex", "violence", "kill", "death", "weapon",
    "drug", "hate", "stupid", "idiot", "dumb", "ugly", "fat", "racist"
}

UNSAFE_KEYWORDS = {
    "violence", "weapon", "gun", "knife", "blood", "murder", "kill", "death",
    "sex", "sexual", "porn", "nude", "naked", "drug", "cocaine", "weed",
    "hate", "racist", "nazi", "terror", "bomb", "suicide"
}


def check_input_safety(text: str) -> Tuple[bool, str]:
    """Check if input text is safe for children.

    

    Returns:

        Tuple of (is_safe: bool, reason: str)

    """
    if not text or not text.strip():
        return True, ""
    
    text_lower = text.lower()
    
    # Check for banned words
    for word in BANNED_WORDS:
        if word in text_lower:
            return False, f"Input contains inappropriate word: '{word}'"
    
    # Check for unsafe keywords
    for keyword in UNSAFE_KEYWORDS:
        if keyword in text_lower:
            return False, f"Input contains unsafe keyword: '{keyword}'"
    
    # Check for excessive caps (yelling)
    if len(text) > 10 and sum(1 for c in text if c.isupper()) / len(text) > 0.7:
        return False, "Please don't use all caps"
    
    return True, ""


def filter_output(text: str) -> str:
    """Filter and clean poem output from model."""
    # Remove common unwanted patterns
    text = re.sub(r'(?i)(here is|here\'s).*?(poem|verse).*?:', '', text)
    text = re.sub(r'(?i)^(poem|title|verse).*?:', '', text, flags=re.MULTILINE)
    text = re.sub(r'\*\*.*?\*\*', '', text)  # Remove markdown bold
    text = re.sub(r'#{1,6}\s+.*', '', text)  # Remove markdown headers
    text = re.sub(r'---+', '', text)  # Remove separators
    text = re.sub(r'##\s+Guidelines.*', '', text, flags=re.DOTALL)  # Remove guidelines if leaked
    text = re.sub(r'<.*?>', '', text)  # Remove any remaining placeholders
    
    # Split into lines and filter
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    
    # Remove lines that look like instructions or metadata
    poem_lines = []
    for line in lines:
        lower_line = line.lower()
        # Skip instruction-like lines
        if any(skip in lower_line for skip in ['guideline', 'parameter', 'instruction', 'format', 'length:', 'age:', 'theme:', 'interest:', 'description:', '- **', 'output only']):
            continue
        # Skip numbered instruction lines
        if re.match(r'^\d+\.\s+\*\*', line):
            continue
        poem_lines.append(line)
    
    # Rejoin the clean poem
    result = '\n'.join(poem_lines)
    
    # Filter banned words
    tokens = result.split()
    filtered = []
    for token in tokens:
        clean = token.lower().strip('.,!?')
        if clean in BANNED_WORDS:
            filtered.append("***")
        else:
            filtered.append(token)
    
    return " ".join(filtered)


def load_prompt_template(filepath: str) -> str:
    """Load prompt template from markdown file."""
    with open(filepath, 'r', encoding='utf-8') as f:
        return f.read()


def fill_prompt_template(template: str, **kwargs) -> str:
    """Replace placeholders like <age>, <theme> with actual values."""
    result = template
    for key, value in kwargs.items():
        placeholder = f"<{key}>"
        result = result.replace(placeholder, str(value))
    return result


def select_words_to_blank(poem: str, difficulty: str, age: int, model) -> List[str]:
    """Use AI model to intelligently select words to blank based on difficulty and age.

    

    Args:

        poem: The complete poem text

        difficulty: Easy, Medium, or Hard

        age: Reader's age for appropriate word selection

        model: The PoetryModel instance

    

    Returns:

        List of words to blank out (lowercase)

    """
    if difficulty == "Easy":
        n_words = 3
        instruction = "3 simple, common words that are easy to guess from context"
    elif difficulty == "Medium":
        n_words = 6
        instruction = "6 moderately challenging words with some ambiguity"
    else:  # Hard
        n_words = 9
        instruction = "9 key thematic words and challenging vocabulary with high ambiguity"
    
    prompt = f"""Select exactly {n_words} words from this poem to remove for a fill-in-the-blank exercise.

Age: {age} years old

Difficulty: {difficulty}



Poem:

{poem}



Instructions:

- Choose {instruction}

- For {difficulty} difficulty, select words appropriate for age {age}

- Consider context clues and ambiguity level

- Return ONLY the {n_words} words, one per line, nothing else



Selected words:"""
    
    try:
        response = model.generate(prompt, max_tokens=512, temperature=0.3)
        print(f"\n{'='*60}\nAI WORD SELECTION\n{'='*60}")
        print(f"Difficulty: {difficulty} | Age: {age}")
        print(f"AI Response:\n{response}")
        
        # Parse the response to extract words
        selected = []
        for line in response.strip().split('\n'):
            word = line.strip().strip('.,!?-*•"\'1234567890. ').lower()
            if word and len(word) > 1 and word not in selected:
                selected.append(word)
            if len(selected) >= n_words:
                break
        
        print(f"Parsed words: {selected}")
        print(f"{'='*60}\n")
        return selected[:n_words] if selected else []
    except Exception as e:
        print(f"Error selecting words with model: {e}")
        return []


def create_fill_in_blank(poem: str, difficulty: str, selected_words: List[str] = None) -> Tuple[str, List[str], List[int], List[str]]:
    """Create fill-in-blank exercise from poem with exact positions.

    

    Args:

        poem: The complete poem text

        difficulty: Easy (3 blanks), Medium (6 blanks), Hard (9 blanks)

        selected_words: Optional list of specific words to blank (from AI model)

    

    Returns:

        Tuple of (blanked_poem, correct_answers_in_order, positions, all_poem_words)

    """
    words = poem.split()
    
    # Determine number of blanks based on difficulty
    if difficulty == "Easy":
        n_blanks = 3
    elif difficulty == "Medium":
        n_blanks = 6
    else:  # Hard
        n_blanks = 9
    
    # If specific words provided by AI model, use those
    blank_indices = []
    if selected_words:
        for idx, word in enumerate(words):
            clean_word = word.strip('.,!?').lower()
            if clean_word in [w.lower() for w in selected_words]:
                blank_indices.append(idx)
                if len(blank_indices) >= n_blanks:
                    break
    
    # Fallback if AI didn't provide enough words
    if len(blank_indices) < n_blanks:
        candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 4 and i not in blank_indices]
        if len(candidates) < (n_blanks - len(blank_indices)):
            candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 3 and i not in blank_indices]
        additional = random.sample(candidates, min(n_blanks - len(blank_indices), len(candidates)))
        blank_indices.extend(additional)
    
    if not blank_indices:
        return poem, [], [], words
    
    # Sort positions for consistent ordering
    blank_indices = sorted(blank_indices[:n_blanks])
    
    # Store correct answers in order of appearance
    correct_answers = []
    positions = []
    blanked_words = words.copy()
    
    for idx in blank_indices:
        original_word = words[idx].strip('.,!?')
        correct_answers.append(original_word)
        positions.append(idx)
        # Replace with uniform blank (15 underscores)
        blanked_words[idx] = "_______________"
    
    blanked_poem = " ".join(blanked_words)
    
    return blanked_poem, correct_answers, positions, words


def get_word_definition(word: str) -> Optional[str]:
    """Get word definition from WordNet."""
    synsets = wn.synsets(word)
    if synsets:
        return synsets[0].definition().lower()
    return None


def rank_definitions(definitions: List[str], correct_definition: str, use_ai: bool = False, model = None) -> List[Tuple[int, float]]:
    """Rank player definitions by similarity to correct definition.

    

    Returns:

        List of tuples (player_index, score) sorted by score descending

    """
    def jaccard(a: str, b: str) -> float:
        a_set = set(a.split())
        b_set = set(b.split())
        if not a_set or not b_set:
            return 0.0
        return len(a_set & b_set) / len(a_set | b_set)
    
    scores = []
    for idx, definition in enumerate(definitions):
        if not definition or not definition.strip():
            scores.append((idx, 0.0))
            continue
            
        # Calculate Jaccard similarity
        jaccard_score = jaccard(definition.lower(), correct_definition.lower())
        
        # If AI scoring is enabled and model is provided
        if use_ai and model:
            try:
                prompt = f"""Rate how well this definition matches the correct definition on a scale of 0-10.



Correct definition: {correct_definition}

Player definition: {definition}



Respond with only a number between 0 and 10."""
                
                ai_response = model.generate(prompt, max_tokens=128, temperature=0.3)
                # Extract number from response
                ai_score = float(re.findall(r'\d+\.?\d*', ai_response)[0]) / 10.0 if re.findall(r'\d+\.?\d*', ai_response) else 0.0
                # Combine AI score (70%) and Jaccard score (30%)
                final_score = 0.7 * ai_score + 0.3 * jaccard_score
            except:
                final_score = jaccard_score
        else:
            final_score = jaccard_score
        
        scores.append((idx, final_score))
    
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores


def load_vocabulary(filepath: str) -> dict:
    """Load vocabulary dictionary from markdown file."""
    vocab = {"Easy": [], "Medium": [], "Hard": []}
    current_level = None
    
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line.startswith("## Easy"):
                current_level = "Easy"
            elif line.startswith("## Medium"):
                current_level = "Medium"
            elif line.startswith("## Hard"):
                current_level = "Hard"
            elif line and current_level and line.startswith("-"):
                word = line.lstrip("- ").strip()
                vocab[current_level].append(word)
    
    return vocab


def load_themes(filepath: str) -> List[str]:
    """Load themes from markdown file."""
    themes = []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line and line.startswith("-"):
                    theme = line.lstrip("- ").strip()
                    if theme:
                        themes.append(theme)
    except FileNotFoundError:
        # Return default themes if file not found
        themes = ["Nature", "Animals", "Friendship", "Adventure", "Family", "Seasons", "Ocean", "Space", "Dreams", "Magic"]
    return themes


def load_interests(filepath: str) -> List[str]:
    """Load interests/hobbies from markdown file."""
    interests = []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line and line.startswith("-"):
                    interest = line.lstrip("- ").strip()
                    if interest:
                        interests.append(interest)
    except FileNotFoundError:
        # Return default interests if file not found
        interests = [
            "Sports", "Music", "Art", "Reading", "Dancing", "Video Games", 
            "Dinosaurs", "Superheroes", "Princesses", "Science", "Cooking",
            "Animals", "Cars", "Robots", "Movies", "Swimming"
        ]
    return interests


def save_leaderboard_score(filepath: str, difficulty: str, player_name: str, score: float):
    """Save a player's score to the leaderboard file."""
    import json
    import os
    
    # Load existing leaderboard or create new
    if os.path.exists(filepath):
        with open(filepath, 'r', encoding='utf-8') as f:
            leaderboard = json.load(f)
    else:
        leaderboard = {"Easy": [], "Medium": [], "Hard": []}
    
    # Add new score
    if difficulty in leaderboard:
        leaderboard[difficulty].append({"name": player_name, "score": score})
        # Sort by score descending and keep top 100
        leaderboard[difficulty].sort(key=lambda x: x["score"], reverse=True)
        leaderboard[difficulty] = leaderboard[difficulty][:100]
    
    # Save back to file
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(leaderboard, f, indent=2)


def get_leaderboard(filepath: str, difficulty: str, top_n: int = 10) -> List[Dict]:
    """Get top N players from leaderboard for a difficulty level."""
    import json
    import os
    
    if not os.path.exists(filepath):
        return []
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            leaderboard = json.load(f)
        
        if difficulty in leaderboard:
            return leaderboard[difficulty][:top_n]
    except:
        return []
    
    return []