Spaces:

exaucengarti
/

AI4Poetry

Sleeping

ngartiexauce

Initial clean snapshot (history reset)

45c71f5 14 days ago

14 kB

	import random
	import re
	from typing import List, Tuple, Optional, Dict
	import nltk
	from nltk.corpus import wordnet as wn

	try:
	nltk.data.find('corpora/wordnet')
	except LookupError:
	nltk.download('wordnet', quiet=True)

	BANNED_WORDS = {
	"fuck", "shit", "damn", "sex", "violence", "kill", "death", "weapon",
	"drug", "hate", "stupid", "idiot", "dumb", "ugly", "fat", "racist"
	}

	UNSAFE_KEYWORDS = {
	"violence", "weapon", "gun", "knife", "blood", "murder", "kill", "death",
	"sex", "sexual", "porn", "nude", "naked", "drug", "cocaine", "weed",
	"hate", "racist", "nazi", "terror", "bomb", "suicide"
	}


	def check_input_safety(text: str) -> Tuple[bool, str]:
	"""Check if input text is safe for children.

	Returns:
	Tuple of (is_safe: bool, reason: str)
	"""
	if not text or not text.strip():
	return True, ""

	text_lower = text.lower()

	# Check for banned words
	for word in BANNED_WORDS:
	if word in text_lower:
	return False, f"Input contains inappropriate word: '{word}'"

	# Check for unsafe keywords
	for keyword in UNSAFE_KEYWORDS:
	if keyword in text_lower:
	return False, f"Input contains unsafe keyword: '{keyword}'"

	# Check for excessive caps (yelling)
	if len(text) > 10 and sum(1 for c in text if c.isupper()) / len(text) > 0.7:
	return False, "Please don't use all caps"

	return True, ""


	def filter_output(text: str) -> str:
	"""Filter and clean poem output from model."""
	# Remove common unwanted patterns
	text = re.sub(r'(?i)(here is\|here\'s).?(poem\|verse).?:', '', text)
	text = re.sub(r'(?i)^(poem\|title\|verse).*?:', '', text, flags=re.MULTILINE)
	text = re.sub(r'\\.?\\*', '', text) # Remove markdown bold
	text = re.sub(r'#{1,6}\s+.*', '', text) # Remove markdown headers
	text = re.sub(r'---+', '', text) # Remove separators
	text = re.sub(r'##\s+Guidelines.*', '', text, flags=re.DOTALL) # Remove guidelines if leaked
	text = re.sub(r'<.*?>', '', text) # Remove any remaining placeholders

	# Split into lines and filter
	lines = [line.strip() for line in text.split('\n') if line.strip()]

	# Remove lines that look like instructions or metadata
	poem_lines = []
	for line in lines:
	lower_line = line.lower()
	# Skip instruction-like lines
	if any(skip in lower_line for skip in ['guideline', 'parameter', 'instruction', 'format', 'length:', 'age:', 'theme:', 'interest:', 'description:', '- **', 'output only']):
	continue
	# Skip numbered instruction lines
	if re.match(r'^\d+\.\s+\\', line):
	continue
	poem_lines.append(line)

	# Rejoin the clean poem
	result = '\n'.join(poem_lines)

	# Filter banned words
	tokens = result.split()
	filtered = []
	for token in tokens:
	clean = token.lower().strip('.,!?')
	if clean in BANNED_WORDS:
	filtered.append("***")
	else:
	filtered.append(token)

	return " ".join(filtered)


	def load_prompt_template(filepath: str) -> str:
	"""Load prompt template from markdown file."""
	with open(filepath, 'r', encoding='utf-8') as f:
	return f.read()


	def fill_prompt_template(template: str, **kwargs) -> str:
	"""Replace placeholders like <age>, <theme> with actual values."""
	result = template
	for key, value in kwargs.items():
	placeholder = f"<{key}>"
	result = result.replace(placeholder, str(value))
	return result


	def select_words_to_blank(poem: str, difficulty: str, age: int, model) -> List[str]:
	"""Use AI model to intelligently select words to blank based on difficulty and age.

	Args:
	poem: The complete poem text
	difficulty: Easy, Medium, or Hard
	age: Reader's age for appropriate word selection
	model: The PoetryModel instance

	Returns:
	List of words to blank out (lowercase)
	"""
	if difficulty == "Easy":
	n_words = 3
	instruction = "3 simple, common words that are easy to guess from context"
	elif difficulty == "Medium":
	n_words = 6
	instruction = "6 moderately challenging words with some ambiguity"
	else: # Hard
	n_words = 9
	instruction = "9 key thematic words and challenging vocabulary with high ambiguity"

	prompt = f"""Select exactly {n_words} words from this poem to remove for a fill-in-the-blank exercise.
	Age: {age} years old
	Difficulty: {difficulty}

	Poem:
	{poem}

	Instructions:
	- Choose {instruction}
	- For {difficulty} difficulty, select words appropriate for age {age}
	- Consider context clues and ambiguity level
	- Return ONLY the {n_words} words, one per line, nothing else

	Selected words:"""

	try:
	response = model.generate(prompt, max_tokens=512, temperature=0.3)
	print(f"\n{'='60}\nAI WORD SELECTION\n{'='60}")
	print(f"Difficulty: {difficulty} \| Age: {age}")
	print(f"AI Response:\n{response}")

	# Parse the response to extract words
	selected = []
	for line in response.strip().split('\n'):
	word = line.strip().strip('.,!?-*•"\'1234567890. ').lower()
	if word and len(word) > 1 and word not in selected:
	selected.append(word)
	if len(selected) >= n_words:
	break

	print(f"Parsed words: {selected}")
	print(f"{'='*60}\n")
	return selected[:n_words] if selected else []
	except Exception as e:
	print(f"Error selecting words with model: {e}")
	return []


	def create_fill_in_blank(poem: str, difficulty: str, selected_words: List[str] = None) -> Tuple[str, List[str], List[int], List[str]]:
	"""Create fill-in-blank exercise from poem with exact positions.

	Args:
	poem: The complete poem text
	difficulty: Easy (3 blanks), Medium (6 blanks), Hard (9 blanks)
	selected_words: Optional list of specific words to blank (from AI model)

	Returns:
	Tuple of (blanked_poem, correct_answers_in_order, positions, all_poem_words)
	"""
	words = poem.split()

	# Determine number of blanks based on difficulty
	if difficulty == "Easy":
	n_blanks = 3
	elif difficulty == "Medium":
	n_blanks = 6
	else: # Hard
	n_blanks = 9

	# If specific words provided by AI model, use those
	blank_indices = []
	if selected_words:
	for idx, word in enumerate(words):
	clean_word = word.strip('.,!?').lower()
	if clean_word in [w.lower() for w in selected_words]:
	blank_indices.append(idx)
	if len(blank_indices) >= n_blanks:
	break

	# Fallback if AI didn't provide enough words
	if len(blank_indices) < n_blanks:
	candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 4 and i not in blank_indices]
	if len(candidates) < (n_blanks - len(blank_indices)):
	candidates = [i for i, w in enumerate(words) if len(w.strip('.,!?')) > 3 and i not in blank_indices]
	additional = random.sample(candidates, min(n_blanks - len(blank_indices), len(candidates)))
	blank_indices.extend(additional)

	if not blank_indices:
	return poem, [], [], words

	# Sort positions for consistent ordering
	blank_indices = sorted(blank_indices[:n_blanks])

	# Store correct answers in order of appearance
	correct_answers = []
	positions = []
	blanked_words = words.copy()

	for idx in blank_indices:
	original_word = words[idx].strip('.,!?')
	correct_answers.append(original_word)
	positions.append(idx)
	# Replace with uniform blank (15 underscores)
	blanked_words[idx] = "_______________"

	blanked_poem = " ".join(blanked_words)

	return blanked_poem, correct_answers, positions, words


	def get_word_definition(word: str) -> Optional[str]:
	"""Get word definition from WordNet."""
	synsets = wn.synsets(word)
	if synsets:
	return synsets[0].definition().lower()
	return None


	def rank_definitions(definitions: List[str], correct_definition: str, use_ai: bool = False, model = None) -> List[Tuple[int, float]]:
	"""Rank player definitions by similarity to correct definition.

	Returns:
	List of tuples (player_index, score) sorted by score descending
	"""
	def jaccard(a: str, b: str) -> float:
	a_set = set(a.split())
	b_set = set(b.split())
	if not a_set or not b_set:
	return 0.0
	return len(a_set & b_set) / len(a_set \| b_set)

	scores = []
	for idx, definition in enumerate(definitions):
	if not definition or not definition.strip():
	scores.append((idx, 0.0))
	continue

	# Calculate Jaccard similarity
	jaccard_score = jaccard(definition.lower(), correct_definition.lower())

	# If AI scoring is enabled and model is provided
	if use_ai and model:
	try:
	prompt = f"""Rate how well this definition matches the correct definition on a scale of 0-10.

	Correct definition: {correct_definition}
	Player definition: {definition}

	Respond with only a number between 0 and 10."""

	ai_response = model.generate(prompt, max_tokens=128, temperature=0.3)
	# Extract number from response
	ai_score = float(re.findall(r'\d+\.?\d', ai_response)[0]) / 10.0 if re.findall(r'\d+\.?\d', ai_response) else 0.0
	# Combine AI score (70%) and Jaccard score (30%)
	final_score = 0.7 * ai_score + 0.3 * jaccard_score
	except:
	final_score = jaccard_score
	else:
	final_score = jaccard_score

	scores.append((idx, final_score))

	scores.sort(key=lambda x: x[1], reverse=True)
	return scores


	def load_vocabulary(filepath: str) -> dict:
	"""Load vocabulary dictionary from markdown file."""
	vocab = {"Easy": [], "Medium": [], "Hard": []}
	current_level = None

	with open(filepath, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line.startswith("## Easy"):
	current_level = "Easy"
	elif line.startswith("## Medium"):
	current_level = "Medium"
	elif line.startswith("## Hard"):
	current_level = "Hard"
	elif line and current_level and line.startswith("-"):
	word = line.lstrip("- ").strip()
	vocab[current_level].append(word)

	return vocab


	def load_themes(filepath: str) -> List[str]:
	"""Load themes from markdown file."""
	themes = []
	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line and line.startswith("-"):
	theme = line.lstrip("- ").strip()
	if theme:
	themes.append(theme)
	except FileNotFoundError:
	# Return default themes if file not found
	themes = ["Nature", "Animals", "Friendship", "Adventure", "Family", "Seasons", "Ocean", "Space", "Dreams", "Magic"]
	return themes


	def load_interests(filepath: str) -> List[str]:
	"""Load interests/hobbies from markdown file."""
	interests = []
	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line and line.startswith("-"):
	interest = line.lstrip("- ").strip()
	if interest:
	interests.append(interest)
	except FileNotFoundError:
	# Return default interests if file not found
	interests = [
	"Sports", "Music", "Art", "Reading", "Dancing", "Video Games",
	"Dinosaurs", "Superheroes", "Princesses", "Science", "Cooking",
	"Animals", "Cars", "Robots", "Movies", "Swimming"
	]
	return interests


	def save_leaderboard_score(filepath: str, difficulty: str, player_name: str, score: float):
	"""Save a player's score to the leaderboard file."""
	import json
	import os

	# Load existing leaderboard or create new
	if os.path.exists(filepath):
	with open(filepath, 'r', encoding='utf-8') as f:
	leaderboard = json.load(f)
	else:
	leaderboard = {"Easy": [], "Medium": [], "Hard": []}

	# Add new score
	if difficulty in leaderboard:
	leaderboard[difficulty].append({"name": player_name, "score": score})
	# Sort by score descending and keep top 100
	leaderboard[difficulty].sort(key=lambda x: x["score"], reverse=True)
	leaderboard[difficulty] = leaderboard[difficulty][:100]

	# Save back to file
	with open(filepath, 'w', encoding='utf-8') as f:
	json.dump(leaderboard, f, indent=2)


	def get_leaderboard(filepath: str, difficulty: str, top_n: int = 10) -> List[Dict]:
	"""Get top N players from leaderboard for a difficulty level."""
	import json
	import os

	if not os.path.exists(filepath):
	return []

	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	leaderboard = json.load(f)

	if difficulty in leaderboard:
	return leaderboard[difficulty][:top_n]
	except:
	return []

	return []