import random import re from typing import List, Tuple, Optional, Dict import nltk from nltk.corpus import wordnet as wn try: nltk.data.find('corpora/wordnet') except LookupError: nltk.download('wordnet', quiet=True) BANNED_WORDS = { "fuck", "shit", "damn", "sex", "violence", "kill", "death", "weapon", "drug", "hate", "stupid", "idiot", "dumb", "ugly", "fat", "racist" } UNSAFE_KEYWORDS = { "violence", "weapon", "gun", "knife", "blood", "murder", "kill", "death", "sex", "sexual", "porn", "nude", "naked", "drug", "cocaine", "weed", "hate", "racist", "nazi", "terror", "bomb", "suicide" } def check_input_safety(text: str) -> Tuple[bool, str]: """Check if input text is safe for children. Returns: Tuple of (is_safe: bool, reason: str) """ if not text or not text.strip(): return True, "" text_lower = text.lower() # Check for banned words for word in BANNED_WORDS: if word in text_lower: return False, f"Input contains inappropriate word: '{word}'" # Check for unsafe keywords for keyword in UNSAFE_KEYWORDS: if keyword in text_lower: return False, f"Input contains unsafe keyword: '{keyword}'" # Check for excessive caps (yelling) if len(text) > 10 and sum(1 for c in text if c.isupper()) / len(text) > 0.7: return False, "Please don't use all caps" return True, "" def filter_output(text: str) -> str: """Filter and clean poem output from model.""" # Remove common unwanted patterns text = re.sub(r'(?i)(here is|here\'s).*?(poem|verse).*?:', '', text) text = re.sub(r'(?i)^(poem|title|verse).*?:', '', text, flags=re.MULTILINE) text = re.sub(r'\*\*.*?\*\*', '', text) # Remove markdown bold text = re.sub(r'#{1,6}\s+.*', '', text) # Remove markdown headers text = re.sub(r'---+', '', text) # Remove separators text = re.sub(r'##\s+Guidelines.*', '', text, flags=re.DOTALL) # Remove guidelines if leaked text = re.sub(r'<.*?>', '', text) # Remove any remaining placeholders # Split into lines and filter lines = [line.strip() for line in text.split('\n') if line.strip()] # Remove lines that look like instructions or metadata poem_lines = [] for line in lines: lower_line = line.lower() # Skip instruction-like lines if any(skip in lower_line for skip in ['guideline', 'parameter', 'instruction', 'format', 'length:', 'age:', 'theme:', 'interest:', 'description:', '- **', 'output only']): continue # Skip numbered instruction lines if re.match(r'^\d+\.\s+\*\*', line): continue poem_lines.append(line) # Rejoin the clean poem result = '\n'.join(poem_lines) # Filter banned words tokens = result.split() filtered = [] for token in tokens: clean = token.lower().strip('.,!?') if clean in BANNED_WORDS: filtered.append("***") else: filtered.append(token) return " ".join(filtered) def load_prompt_template(filepath: str) -> str: """Load prompt template from markdown file.""" with open(filepath, 'r', encoding='utf-8') as f: return f.read() def fill_prompt_template(template: str, **kwargs) -> str: """Replace placeholders like , with actual values.""" result = template for key, value in kwargs.items(): placeholder = f"<{key}>" result = result.replace(placeholder, str(value)) return result def select_words_to_blank(poem: str, difficulty: str, age: int, model) -> List[str]: """Use AI model to intelligently select words to blank based on difficulty and age. Args: poem: The complete poem text difficulty: Easy, Medium, or Hard age: Reader's age for appropriate word selection model: The PoetryModel instance Returns: List of words to blank out (lowercase) """ if difficulty == "Easy": n_words = 3 instruction = "3 simple, common words that are easy to guess from context" elif difficulty == "Medium": n_words = 6 instruction = "6 moderately challenging words with some ambiguity" else: # Hard n_words = 9 instruction = "9 key thematic words and challenging vocabulary with high ambiguity" prompt = f"""Select exactly {n_words} words from this poem to remove for a fill-in-the-blank exercise. Age: {age} years old Difficulty: {difficulty} Poem: {poem} Instructions: - Choose {instruction} - For {difficulty} difficulty, select words appropriate for age {age} - Consider context clues and ambiguity level - Return ONLY the {n_words} words, one per line, nothing else Selected words:""" try: response = model.generate(prompt, max_tokens=512, temperature=0.3) print(f"\n{'='*60}\nAI WORD SELECTION\n{'='*60}") print(f"Difficulty: {difficulty} | Age: {age}") print(f"AI Response:\n{response}") # Parse the response to extract words selected = [] for line in response.strip().split('\n'): word = line.strip().strip('.,!?-*•"\'1234567890. ').lower() if word and len(word) > 1 and word not in selected: selected.append(word) if len(selected) >= n_words: break print(f"Parsed words: {selected}") print(f"{'='*60}\n") return selected[:n_words] if selected else [] except Exception as e: print(f"Error selecting words with model: {e}") return [] def create_fill_in_blank(poem: str, difficulty: str, selected_words: List[str] = None) -> Tuple[str, List[str], List[int], List[str]]: """Create fill-in-blank exercise from poem with exact positions. Args: poem: The complete poem text difficulty: Easy (3 blanks), Medium (6 blanks), Hard (9 blanks) selected_words: Optional list of specific words to blank (from AI model) Returns: Tuple of (blanked_poem, correct_answers_in_order, positions, all_poem_words) """ words = poem.split() # Determine number of blanks based on difficulty if difficulty == "Easy": n_blanks = 3 elif difficulty == "Medium": n_blanks = 6 else: # Hard n_blanks = 9 # If specific words provided by AI model, use those blank_indices = [] if selected_words: for idx, word in enumerate(words): clean_word = word.strip('.,!?').lower() if clean_word in [w.lower() for w in selected_words]: blank_indices.append(idx) if len(blank_indices) >= n_blanks: break # Fallback if AI didn't provide enough words if len(blank_indices) < n_blanks: candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 4 and i not in blank_indices] if len(candidates) < (n_blanks - len(blank_indices)): candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 3 and i not in blank_indices] additional = random.sample(candidates, min(n_blanks - len(blank_indices), len(candidates))) blank_indices.extend(additional) if not blank_indices: return poem, [], [], words # Sort positions for consistent ordering blank_indices = sorted(blank_indices[:n_blanks]) # Store correct answers in order of appearance correct_answers = [] positions = [] blanked_words = words.copy() for idx in blank_indices: original_word = words[idx].strip('.,!?') correct_answers.append(original_word) positions.append(idx) # Replace with uniform blank (15 underscores) blanked_words[idx] = "_______________" blanked_poem = " ".join(blanked_words) return blanked_poem, correct_answers, positions, words def get_word_definition(word: str) -> Optional[str]: """Get word definition from WordNet.""" synsets = wn.synsets(word) if synsets: return synsets[0].definition().lower() return None def rank_definitions(definitions: List[str], correct_definition: str, use_ai: bool = False, model = None) -> List[Tuple[int, float]]: """Rank player definitions by similarity to correct definition. Returns: List of tuples (player_index, score) sorted by score descending """ def jaccard(a: str, b: str) -> float: a_set = set(a.split()) b_set = set(b.split()) if not a_set or not b_set: return 0.0 return len(a_set & b_set) / len(a_set | b_set) scores = [] for idx, definition in enumerate(definitions): if not definition or not definition.strip(): scores.append((idx, 0.0)) continue # Calculate Jaccard similarity jaccard_score = jaccard(definition.lower(), correct_definition.lower()) # If AI scoring is enabled and model is provided if use_ai and model: try: prompt = f"""Rate how well this definition matches the correct definition on a scale of 0-10. Correct definition: {correct_definition} Player definition: {definition} Respond with only a number between 0 and 10.""" ai_response = model.generate(prompt, max_tokens=128, temperature=0.3) # Extract number from response ai_score = float(re.findall(r'\d+\.?\d*', ai_response)[0]) / 10.0 if re.findall(r'\d+\.?\d*', ai_response) else 0.0 # Combine AI score (70%) and Jaccard score (30%) final_score = 0.7 * ai_score + 0.3 * jaccard_score except: final_score = jaccard_score else: final_score = jaccard_score scores.append((idx, final_score)) scores.sort(key=lambda x: x[1], reverse=True) return scores def load_vocabulary(filepath: str) -> dict: """Load vocabulary dictionary from markdown file.""" vocab = {"Easy": [], "Medium": [], "Hard": []} current_level = None with open(filepath, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith("## Easy"): current_level = "Easy" elif line.startswith("## Medium"): current_level = "Medium" elif line.startswith("## Hard"): current_level = "Hard" elif line and current_level and line.startswith("-"): word = line.lstrip("- ").strip() vocab[current_level].append(word) return vocab def load_themes(filepath: str) -> List[str]: """Load themes from markdown file.""" themes = [] try: with open(filepath, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line and line.startswith("-"): theme = line.lstrip("- ").strip() if theme: themes.append(theme) except FileNotFoundError: # Return default themes if file not found themes = ["Nature", "Animals", "Friendship", "Adventure", "Family", "Seasons", "Ocean", "Space", "Dreams", "Magic"] return themes def load_interests(filepath: str) -> List[str]: """Load interests/hobbies from markdown file.""" interests = [] try: with open(filepath, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line and line.startswith("-"): interest = line.lstrip("- ").strip() if interest: interests.append(interest) except FileNotFoundError: # Return default interests if file not found interests = [ "Sports", "Music", "Art", "Reading", "Dancing", "Video Games", "Dinosaurs", "Superheroes", "Princesses", "Science", "Cooking", "Animals", "Cars", "Robots", "Movies", "Swimming" ] return interests def save_leaderboard_score(filepath: str, difficulty: str, player_name: str, score: float): """Save a player's score to the leaderboard file.""" import json import os # Load existing leaderboard or create new if os.path.exists(filepath): with open(filepath, 'r', encoding='utf-8') as f: leaderboard = json.load(f) else: leaderboard = {"Easy": [], "Medium": [], "Hard": []} # Add new score if difficulty in leaderboard: leaderboard[difficulty].append({"name": player_name, "score": score}) # Sort by score descending and keep top 100 leaderboard[difficulty].sort(key=lambda x: x["score"], reverse=True) leaderboard[difficulty] = leaderboard[difficulty][:100] # Save back to file with open(filepath, 'w', encoding='utf-8') as f: json.dump(leaderboard, f, indent=2) def get_leaderboard(filepath: str, difficulty: str, top_n: int = 10) -> List[Dict]: """Get top N players from leaderboard for a difficulty level.""" import json import os if not os.path.exists(filepath): return [] try: with open(filepath, 'r', encoding='utf-8') as f: leaderboard = json.load(f) if difficulty in leaderboard: return leaderboard[difficulty][:top_n] except: return [] return []