Spaces:
Sleeping
Sleeping
| import random | |
| import re | |
| from typing import List, Tuple, Optional, Dict | |
| import nltk | |
| from nltk.corpus import wordnet as wn | |
| try: | |
| nltk.data.find('corpora/wordnet') | |
| except LookupError: | |
| nltk.download('wordnet', quiet=True) | |
| BANNED_WORDS = { | |
| "fuck", "shit", "damn", "sex", "violence", "kill", "death", "weapon", | |
| "drug", "hate", "stupid", "idiot", "dumb", "ugly", "fat", "racist" | |
| } | |
| UNSAFE_KEYWORDS = { | |
| "violence", "weapon", "gun", "knife", "blood", "murder", "kill", "death", | |
| "sex", "sexual", "porn", "nude", "naked", "drug", "cocaine", "weed", | |
| "hate", "racist", "nazi", "terror", "bomb", "suicide" | |
| } | |
| def check_input_safety(text: str) -> Tuple[bool, str]: | |
| """Check if input text is safe for children. | |
| Returns: | |
| Tuple of (is_safe: bool, reason: str) | |
| """ | |
| if not text or not text.strip(): | |
| return True, "" | |
| text_lower = text.lower() | |
| # Check for banned words | |
| for word in BANNED_WORDS: | |
| if word in text_lower: | |
| return False, f"Input contains inappropriate word: '{word}'" | |
| # Check for unsafe keywords | |
| for keyword in UNSAFE_KEYWORDS: | |
| if keyword in text_lower: | |
| return False, f"Input contains unsafe keyword: '{keyword}'" | |
| # Check for excessive caps (yelling) | |
| if len(text) > 10 and sum(1 for c in text if c.isupper()) / len(text) > 0.7: | |
| return False, "Please don't use all caps" | |
| return True, "" | |
| def filter_output(text: str) -> str: | |
| """Filter and clean poem output from model.""" | |
| # Remove common unwanted patterns | |
| text = re.sub(r'(?i)(here is|here\'s).*?(poem|verse).*?:', '', text) | |
| text = re.sub(r'(?i)^(poem|title|verse).*?:', '', text, flags=re.MULTILINE) | |
| text = re.sub(r'\*\*.*?\*\*', '', text) # Remove markdown bold | |
| text = re.sub(r'#{1,6}\s+.*', '', text) # Remove markdown headers | |
| text = re.sub(r'---+', '', text) # Remove separators | |
| text = re.sub(r'##\s+Guidelines.*', '', text, flags=re.DOTALL) # Remove guidelines if leaked | |
| text = re.sub(r'<.*?>', '', text) # Remove any remaining placeholders | |
| # Split into lines and filter | |
| lines = [line.strip() for line in text.split('\n') if line.strip()] | |
| # Remove lines that look like instructions or metadata | |
| poem_lines = [] | |
| for line in lines: | |
| lower_line = line.lower() | |
| # Skip instruction-like lines | |
| if any(skip in lower_line for skip in ['guideline', 'parameter', 'instruction', 'format', 'length:', 'age:', 'theme:', 'interest:', 'description:', '- **', 'output only']): | |
| continue | |
| # Skip numbered instruction lines | |
| if re.match(r'^\d+\.\s+\*\*', line): | |
| continue | |
| poem_lines.append(line) | |
| # Rejoin the clean poem | |
| result = '\n'.join(poem_lines) | |
| # Filter banned words | |
| tokens = result.split() | |
| filtered = [] | |
| for token in tokens: | |
| clean = token.lower().strip('.,!?') | |
| if clean in BANNED_WORDS: | |
| filtered.append("***") | |
| else: | |
| filtered.append(token) | |
| return " ".join(filtered) | |
| def load_prompt_template(filepath: str) -> str: | |
| """Load prompt template from markdown file.""" | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| return f.read() | |
| def fill_prompt_template(template: str, **kwargs) -> str: | |
| """Replace placeholders like <age>, <theme> with actual values.""" | |
| result = template | |
| for key, value in kwargs.items(): | |
| placeholder = f"<{key}>" | |
| result = result.replace(placeholder, str(value)) | |
| return result | |
| def select_words_to_blank(poem: str, difficulty: str, age: int, model) -> List[str]: | |
| """Use AI model to intelligently select words to blank based on difficulty and age. | |
| Args: | |
| poem: The complete poem text | |
| difficulty: Easy, Medium, or Hard | |
| age: Reader's age for appropriate word selection | |
| model: The PoetryModel instance | |
| Returns: | |
| List of words to blank out (lowercase) | |
| """ | |
| if difficulty == "Easy": | |
| n_words = 3 | |
| instruction = "3 simple, common words that are easy to guess from context" | |
| elif difficulty == "Medium": | |
| n_words = 6 | |
| instruction = "6 moderately challenging words with some ambiguity" | |
| else: # Hard | |
| n_words = 9 | |
| instruction = "9 key thematic words and challenging vocabulary with high ambiguity" | |
| prompt = f"""Select exactly {n_words} words from this poem to remove for a fill-in-the-blank exercise. | |
| Age: {age} years old | |
| Difficulty: {difficulty} | |
| Poem: | |
| {poem} | |
| Instructions: | |
| - Choose {instruction} | |
| - For {difficulty} difficulty, select words appropriate for age {age} | |
| - Consider context clues and ambiguity level | |
| - Return ONLY the {n_words} words, one per line, nothing else | |
| Selected words:""" | |
| try: | |
| response = model.generate(prompt, max_tokens=512, temperature=0.3) | |
| print(f"\n{'='*60}\nAI WORD SELECTION\n{'='*60}") | |
| print(f"Difficulty: {difficulty} | Age: {age}") | |
| print(f"AI Response:\n{response}") | |
| # Parse the response to extract words | |
| selected = [] | |
| for line in response.strip().split('\n'): | |
| word = line.strip().strip('.,!?-*•"\'1234567890. ').lower() | |
| if word and len(word) > 1 and word not in selected: | |
| selected.append(word) | |
| if len(selected) >= n_words: | |
| break | |
| print(f"Parsed words: {selected}") | |
| print(f"{'='*60}\n") | |
| return selected[:n_words] if selected else [] | |
| except Exception as e: | |
| print(f"Error selecting words with model: {e}") | |
| return [] | |
| def create_fill_in_blank(poem: str, difficulty: str, selected_words: List[str] = None) -> Tuple[str, List[str], List[int], List[str]]: | |
| """Create fill-in-blank exercise from poem with exact positions. | |
| Args: | |
| poem: The complete poem text | |
| difficulty: Easy (3 blanks), Medium (6 blanks), Hard (9 blanks) | |
| selected_words: Optional list of specific words to blank (from AI model) | |
| Returns: | |
| Tuple of (blanked_poem, correct_answers_in_order, positions, all_poem_words) | |
| """ | |
| words = poem.split() | |
| # Determine number of blanks based on difficulty | |
| if difficulty == "Easy": | |
| n_blanks = 3 | |
| elif difficulty == "Medium": | |
| n_blanks = 6 | |
| else: # Hard | |
| n_blanks = 9 | |
| # If specific words provided by AI model, use those | |
| blank_indices = [] | |
| if selected_words: | |
| for idx, word in enumerate(words): | |
| clean_word = word.strip('.,!?').lower() | |
| if clean_word in [w.lower() for w in selected_words]: | |
| blank_indices.append(idx) | |
| if len(blank_indices) >= n_blanks: | |
| break | |
| # Fallback if AI didn't provide enough words | |
| if len(blank_indices) < n_blanks: | |
| candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 4 and i not in blank_indices] | |
| if len(candidates) < (n_blanks - len(blank_indices)): | |
| candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 3 and i not in blank_indices] | |
| additional = random.sample(candidates, min(n_blanks - len(blank_indices), len(candidates))) | |
| blank_indices.extend(additional) | |
| if not blank_indices: | |
| return poem, [], [], words | |
| # Sort positions for consistent ordering | |
| blank_indices = sorted(blank_indices[:n_blanks]) | |
| # Store correct answers in order of appearance | |
| correct_answers = [] | |
| positions = [] | |
| blanked_words = words.copy() | |
| for idx in blank_indices: | |
| original_word = words[idx].strip('.,!?') | |
| correct_answers.append(original_word) | |
| positions.append(idx) | |
| # Replace with uniform blank (15 underscores) | |
| blanked_words[idx] = "_______________" | |
| blanked_poem = " ".join(blanked_words) | |
| return blanked_poem, correct_answers, positions, words | |
| def get_word_definition(word: str) -> Optional[str]: | |
| """Get word definition from WordNet.""" | |
| synsets = wn.synsets(word) | |
| if synsets: | |
| return synsets[0].definition().lower() | |
| return None | |
| def rank_definitions(definitions: List[str], correct_definition: str, use_ai: bool = False, model = None) -> List[Tuple[int, float]]: | |
| """Rank player definitions by similarity to correct definition. | |
| Returns: | |
| List of tuples (player_index, score) sorted by score descending | |
| """ | |
| def jaccard(a: str, b: str) -> float: | |
| a_set = set(a.split()) | |
| b_set = set(b.split()) | |
| if not a_set or not b_set: | |
| return 0.0 | |
| return len(a_set & b_set) / len(a_set | b_set) | |
| scores = [] | |
| for idx, definition in enumerate(definitions): | |
| if not definition or not definition.strip(): | |
| scores.append((idx, 0.0)) | |
| continue | |
| # Calculate Jaccard similarity | |
| jaccard_score = jaccard(definition.lower(), correct_definition.lower()) | |
| # If AI scoring is enabled and model is provided | |
| if use_ai and model: | |
| try: | |
| prompt = f"""Rate how well this definition matches the correct definition on a scale of 0-10. | |
| Correct definition: {correct_definition} | |
| Player definition: {definition} | |
| Respond with only a number between 0 and 10.""" | |
| ai_response = model.generate(prompt, max_tokens=128, temperature=0.3) | |
| # Extract number from response | |
| ai_score = float(re.findall(r'\d+\.?\d*', ai_response)[0]) / 10.0 if re.findall(r'\d+\.?\d*', ai_response) else 0.0 | |
| # Combine AI score (70%) and Jaccard score (30%) | |
| final_score = 0.7 * ai_score + 0.3 * jaccard_score | |
| except: | |
| final_score = jaccard_score | |
| else: | |
| final_score = jaccard_score | |
| scores.append((idx, final_score)) | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| return scores | |
| def load_vocabulary(filepath: str) -> dict: | |
| """Load vocabulary dictionary from markdown file.""" | |
| vocab = {"Easy": [], "Medium": [], "Hard": []} | |
| current_level = None | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line.startswith("## Easy"): | |
| current_level = "Easy" | |
| elif line.startswith("## Medium"): | |
| current_level = "Medium" | |
| elif line.startswith("## Hard"): | |
| current_level = "Hard" | |
| elif line and current_level and line.startswith("-"): | |
| word = line.lstrip("- ").strip() | |
| vocab[current_level].append(word) | |
| return vocab | |
| def load_themes(filepath: str) -> List[str]: | |
| """Load themes from markdown file.""" | |
| themes = [] | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and line.startswith("-"): | |
| theme = line.lstrip("- ").strip() | |
| if theme: | |
| themes.append(theme) | |
| except FileNotFoundError: | |
| # Return default themes if file not found | |
| themes = ["Nature", "Animals", "Friendship", "Adventure", "Family", "Seasons", "Ocean", "Space", "Dreams", "Magic"] | |
| return themes | |
| def load_interests(filepath: str) -> List[str]: | |
| """Load interests/hobbies from markdown file.""" | |
| interests = [] | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and line.startswith("-"): | |
| interest = line.lstrip("- ").strip() | |
| if interest: | |
| interests.append(interest) | |
| except FileNotFoundError: | |
| # Return default interests if file not found | |
| interests = [ | |
| "Sports", "Music", "Art", "Reading", "Dancing", "Video Games", | |
| "Dinosaurs", "Superheroes", "Princesses", "Science", "Cooking", | |
| "Animals", "Cars", "Robots", "Movies", "Swimming" | |
| ] | |
| return interests | |
| def save_leaderboard_score(filepath: str, difficulty: str, player_name: str, score: float): | |
| """Save a player's score to the leaderboard file.""" | |
| import json | |
| import os | |
| # Load existing leaderboard or create new | |
| if os.path.exists(filepath): | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| leaderboard = json.load(f) | |
| else: | |
| leaderboard = {"Easy": [], "Medium": [], "Hard": []} | |
| # Add new score | |
| if difficulty in leaderboard: | |
| leaderboard[difficulty].append({"name": player_name, "score": score}) | |
| # Sort by score descending and keep top 100 | |
| leaderboard[difficulty].sort(key=lambda x: x["score"], reverse=True) | |
| leaderboard[difficulty] = leaderboard[difficulty][:100] | |
| # Save back to file | |
| with open(filepath, 'w', encoding='utf-8') as f: | |
| json.dump(leaderboard, f, indent=2) | |
| def get_leaderboard(filepath: str, difficulty: str, top_n: int = 10) -> List[Dict]: | |
| """Get top N players from leaderboard for a difficulty level.""" | |
| import json | |
| import os | |
| if not os.path.exists(filepath): | |
| return [] | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| leaderboard = json.load(f) | |
| if difficulty in leaderboard: | |
| return leaderboard[difficulty][:top_n] | |
| except: | |
| return [] | |
| return [] | |