Spaces:

exaucengarti
/

AI4Poetry

Sleeping

File size: 13,966 Bytes

45c71f5

import random
import re
from typing import List, Tuple, Optional, Dict
import nltk
from nltk.corpus import wordnet as wn

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet', quiet=True)

BANNED_WORDS = {
    "fuck", "shit", "damn", "sex", "violence", "kill", "death", "weapon",
    "drug", "hate", "stupid", "idiot", "dumb", "ugly", "fat", "racist"
}

UNSAFE_KEYWORDS = {
    "violence", "weapon", "gun", "knife", "blood", "murder", "kill", "death",
    "sex", "sexual", "porn", "nude", "naked", "drug", "cocaine", "weed",
    "hate", "racist", "nazi", "terror", "bomb", "suicide"
}


def check_input_safety(text: str) -> Tuple[bool, str]:
    """Check if input text is safe for children.

    

    Returns:

        Tuple of (is_safe: bool, reason: str)

    """
    if not text or not text.strip():
        return True, ""
    
    text_lower = text.lower()
    
    # Check for banned words
    for word in BANNED_WORDS:
        if word in text_lower:
            return False, f"Input contains inappropriate word: '{word}'"
    
    # Check for unsafe keywords
    for keyword in UNSAFE_KEYWORDS:
        if keyword in text_lower:
            return False, f"Input contains unsafe keyword: '{keyword}'"
    
    # Check for excessive caps (yelling)
    if len(text) > 10 and sum(1 for c in text if c.isupper()) / len(text) > 0.7:
        return False, "Please don't use all caps"
    
    return True, ""


def filter_output(text: str) -> str:
    """Filter and clean poem output from model."""
    # Remove common unwanted patterns
    text = re.sub(r'(?i)(here is|here\'s).*?(poem|verse).*?:', '', text)
    text = re.sub(r'(?i)^(poem|title|verse).*?:', '', text, flags=re.MULTILINE)
    text = re.sub(r'\*\*.*?\*\*', '', text)  # Remove markdown bold
    text = re.sub(r'#{1,6}\s+.*', '', text)  # Remove markdown headers
    text = re.sub(r'---+', '', text)  # Remove separators
    text = re.sub(r'##\s+Guidelines.*', '', text, flags=re.DOTALL)  # Remove guidelines if leaked
    text = re.sub(r'<.*?>', '', text)  # Remove any remaining placeholders
    
    # Split into lines and filter
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    
    # Remove lines that look like instructions or metadata
    poem_lines = []
    for line in lines:
        lower_line = line.lower()
        # Skip instruction-like lines
        if any(skip in lower_line for skip in ['guideline', 'parameter', 'instruction', 'format', 'length:', 'age:', 'theme:', 'interest:', 'description:', '- **', 'output only']):
            continue
        # Skip numbered instruction lines
        if re.match(r'^\d+\.\s+\*\*', line):
            continue
        poem_lines.append(line)
    
    # Rejoin the clean poem
    result = '\n'.join(poem_lines)
    
    # Filter banned words
    tokens = result.split()
    filtered = []
    for token in tokens:
        clean = token.lower().strip('.,!?')
        if clean in BANNED_WORDS:
            filtered.append("***")
        else:
            filtered.append(token)
    
    return " ".join(filtered)


def load_prompt_template(filepath: str) -> str:
    """Load prompt template from markdown file."""
    with open(filepath, 'r', encoding='utf-8') as f:
        return f.read()


def fill_prompt_template(template: str, **kwargs) -> str:
    """Replace placeholders like <age>, <theme> with actual values."""
    result = template
    for key, value in kwargs.items():
        placeholder = f"<{key}>"
        result = result.replace(placeholder, str(value))
    return result


def select_words_to_blank(poem: str, difficulty: str, age: int, model) -> List[str]:
    """Use AI model to intelligently select words to blank based on difficulty and age.

    

    Args:

        poem: The complete poem text

        difficulty: Easy, Medium, or Hard

        age: Reader's age for appropriate word selection

        model: The PoetryModel instance

    

    Returns:

        List of words to blank out (lowercase)

    """
    if difficulty == "Easy":
        n_words = 3
        instruction = "3 simple, common words that are easy to guess from context"
    elif difficulty == "Medium":
        n_words = 6
        instruction = "6 moderately challenging words with some ambiguity"
    else:  # Hard
        n_words = 9
        instruction = "9 key thematic words and challenging vocabulary with high ambiguity"
    
    prompt = f"""Select exactly {n_words} words from this poem to remove for a fill-in-the-blank exercise.

Age: {age} years old

Difficulty: {difficulty}



Poem:

{poem}



Instructions:

- Choose {instruction}

- For {difficulty} difficulty, select words appropriate for age {age}

- Consider context clues and ambiguity level

- Return ONLY the {n_words} words, one per line, nothing else



Selected words:"""
    
    try:
        response = model.generate(prompt, max_tokens=512, temperature=0.3)
        print(f"\n{'='*60}\nAI WORD SELECTION\n{'='*60}")
        print(f"Difficulty: {difficulty} | Age: {age}")
        print(f"AI Response:\n{response}")
        
        # Parse the response to extract words
        selected = []
        for line in response.strip().split('\n'):
            word = line.strip().strip('.,!?-*•"\'1234567890. ').lower()
            if word and len(word) > 1 and word not in selected:
                selected.append(word)
            if len(selected) >= n_words:
                break
        
        print(f"Parsed words: {selected}")
        print(f"{'='*60}\n")
        return selected[:n_words] if selected else []
    except Exception as e:
        print(f"Error selecting words with model: {e}")
        return []


def create_fill_in_blank(poem: str, difficulty: str, selected_words: List[str] = None) -> Tuple[str, List[str], List[int], List[str]]:
    """Create fill-in-blank exercise from poem with exact positions.

    

    Args:

        poem: The complete poem text

        difficulty: Easy (3 blanks), Medium (6 blanks), Hard (9 blanks)

        selected_words: Optional list of specific words to blank (from AI model)

    

    Returns:

        Tuple of (blanked_poem, correct_answers_in_order, positions, all_poem_words)

    """
    words = poem.split()
    
    # Determine number of blanks based on difficulty
    if difficulty == "Easy":
        n_blanks = 3
    elif difficulty == "Medium":
        n_blanks = 6
    else:  # Hard
        n_blanks = 9
    
    # If specific words provided by AI model, use those
    blank_indices = []
    if selected_words:
        for idx, word in enumerate(words):
            clean_word = word.strip('.,!?').lower()
            if clean_word in [w.lower() for w in selected_words]:
                blank_indices.append(idx)
                if len(blank_indices) >= n_blanks:
                    break
    
    # Fallback if AI didn't provide enough words
    if len(blank_indices) < n_blanks:
        candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 4 and i not in blank_indices]
        if len(candidates) < (n_blanks - len(blank_indices)):
            candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 3 and i not in blank_indices]
        additional = random.sample(candidates, min(n_blanks - len(blank_indices), len(candidates)))
        blank_indices.extend(additional)
    
    if not blank_indices:
        return poem, [], [], words
    
    # Sort positions for consistent ordering
    blank_indices = sorted(blank_indices[:n_blanks])
    
    # Store correct answers in order of appearance
    correct_answers = []
    positions = []
    blanked_words = words.copy()
    
    for idx in blank_indices:
        original_word = words[idx].strip('.,!?')
        correct_answers.append(original_word)
        positions.append(idx)
        # Replace with uniform blank (15 underscores)
        blanked_words[idx] = "_______________"
    
    blanked_poem = " ".join(blanked_words)
    
    return blanked_poem, correct_answers, positions, words


def get_word_definition(word: str) -> Optional[str]:
    """Get word definition from WordNet."""
    synsets = wn.synsets(word)
    if synsets:
        return synsets[0].definition().lower()
    return None


def rank_definitions(definitions: List[str], correct_definition: str, use_ai: bool = False, model = None) -> List[Tuple[int, float]]:
    """Rank player definitions by similarity to correct definition.

    

    Returns:

        List of tuples (player_index, score) sorted by score descending

    """
    def jaccard(a: str, b: str) -> float:
        a_set = set(a.split())
        b_set = set(b.split())
        if not a_set or not b_set:
            return 0.0
        return len(a_set & b_set) / len(a_set | b_set)
    
    scores = []
    for idx, definition in enumerate(definitions):
        if not definition or not definition.strip():
            scores.append((idx, 0.0))
            continue
            
        # Calculate Jaccard similarity
        jaccard_score = jaccard(definition.lower(), correct_definition.lower())
        
        # If AI scoring is enabled and model is provided
        if use_ai and model:
            try:
                prompt = f"""Rate how well this definition matches the correct definition on a scale of 0-10.



Correct definition: {correct_definition}

Player definition: {definition}



Respond with only a number between 0 and 10."""
                
                ai_response = model.generate(prompt, max_tokens=128, temperature=0.3)
                # Extract number from response
                ai_score = float(re.findall(r'\d+\.?\d*', ai_response)[0]) / 10.0 if re.findall(r'\d+\.?\d*', ai_response) else 0.0
                # Combine AI score (70%) and Jaccard score (30%)
                final_score = 0.7 * ai_score + 0.3 * jaccard_score
            except:
                final_score = jaccard_score
        else:
            final_score = jaccard_score
        
        scores.append((idx, final_score))
    
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores


def load_vocabulary(filepath: str) -> dict:
    """Load vocabulary dictionary from markdown file."""
    vocab = {"Easy": [], "Medium": [], "Hard": []}
    current_level = None
    
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line.startswith("## Easy"):
                current_level = "Easy"
            elif line.startswith("## Medium"):
                current_level = "Medium"
            elif line.startswith("## Hard"):
                current_level = "Hard"
            elif line and current_level and line.startswith("-"):
                word = line.lstrip("- ").strip()
                vocab[current_level].append(word)
    
    return vocab


def load_themes(filepath: str) -> List[str]:
    """Load themes from markdown file."""
    themes = []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line and line.startswith("-"):
                    theme = line.lstrip("- ").strip()
                    if theme:
                        themes.append(theme)
    except FileNotFoundError:
        # Return default themes if file not found
        themes = ["Nature", "Animals", "Friendship", "Adventure", "Family", "Seasons", "Ocean", "Space", "Dreams", "Magic"]
    return themes


def load_interests(filepath: str) -> List[str]:
    """Load interests/hobbies from markdown file."""
    interests = []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line and line.startswith("-"):
                    interest = line.lstrip("- ").strip()
                    if interest:
                        interests.append(interest)
    except FileNotFoundError:
        # Return default interests if file not found
        interests = [
            "Sports", "Music", "Art", "Reading", "Dancing", "Video Games", 
            "Dinosaurs", "Superheroes", "Princesses", "Science", "Cooking",
            "Animals", "Cars", "Robots", "Movies", "Swimming"
        ]
    return interests


def save_leaderboard_score(filepath: str, difficulty: str, player_name: str, score: float):
    """Save a player's score to the leaderboard file."""
    import json
    import os
    
    # Load existing leaderboard or create new
    if os.path.exists(filepath):
        with open(filepath, 'r', encoding='utf-8') as f:
            leaderboard = json.load(f)
    else:
        leaderboard = {"Easy": [], "Medium": [], "Hard": []}
    
    # Add new score
    if difficulty in leaderboard:
        leaderboard[difficulty].append({"name": player_name, "score": score})
        # Sort by score descending and keep top 100
        leaderboard[difficulty].sort(key=lambda x: x["score"], reverse=True)
        leaderboard[difficulty] = leaderboard[difficulty][:100]
    
    # Save back to file
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(leaderboard, f, indent=2)


def get_leaderboard(filepath: str, difficulty: str, top_n: int = 10) -> List[Dict]:
    """Get top N players from leaderboard for a difficulty level."""
    import json
    import os
    
    if not os.path.exists(filepath):
        return []
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            leaderboard = json.load(f)
        
        if difficulty in leaderboard:
            return leaderboard[difficulty][:top_n]
    except:
        return []
    
    return []