# src/app/flashcards_tools.py import json import re from pathlib import Path from typing import Dict, List, Tuple, Optional from deep_translator import GoogleTranslator from .config import get_user_dir def _get_decks_dir(username: str) -> Path: """ Returns the directory where all of a user's decks are stored. """ user_dir = get_user_dir(username) decks_dir = user_dir / "decks" decks_dir.mkdir(parents=True, exist_ok=True) return decks_dir def list_user_decks(username: str) -> Dict[str, Path]: """ Returns a mapping of deck name -> deck json path. Deck name is taken from the deck's "name" field if present, otherwise the filename stem. """ decks_dir = _get_decks_dir(username) deck_files = sorted(decks_dir.glob("*.json")) decks: Dict[str, Path] = {} for path in deck_files: try: data = json.loads(path.read_text(encoding="utf-8")) name = data.get("name") or path.stem except Exception: name = path.stem # ensure uniqueness by appending stem if needed if name in decks and decks[name] != path: name = f"{name} ({path.stem})" decks[name] = path return decks def _ensure_card_stats(card: Dict) -> None: """ Ensure that a card has simple spaced-repetition stats. """ if "score" not in card: # learning strength card["score"] = 0 if "reviews" not in card: card["reviews"] = 0 def load_deck(path: Path) -> Dict: """ Loads a deck from JSON, ensuring 'cards' exists and that each card has basic stats for spaced repetition. """ try: data = json.loads(path.read_text(encoding="utf-8")) except Exception: data = {} if "cards" not in data or not isinstance(data["cards"], list): data["cards"] = [] if "name" not in data: data["name"] = path.stem if "tags" not in data or not isinstance(data["tags"], list): data["tags"] = [] for card in data["cards"]: _ensure_card_stats(card) return data def save_deck(path: Path, deck: Dict) -> None: """ Saves deck to JSON. """ if "cards" not in deck: deck["cards"] = [] if "name" not in deck: deck["name"] = path.stem if "tags" not in deck or not isinstance(deck["tags"], list): deck["tags"] = [] # make sure stats are present for card in deck["cards"]: _ensure_card_stats(card) path.write_text(json.dumps(deck, indent=2, ensure_ascii=False), encoding="utf-8") # ------------------------------------------------------------ # Shared tokenization # ------------------------------------------------------------ def _extract_candidate_words(text: str) -> List[str]: """ Simple tokenizer & filter for candidate vocab words. """ tokens = re.findall(r"\b\w+\b", text, flags=re.UNICODE) out = [] seen = set() for t in tokens: t_norm = t.strip() if len(t_norm) < 2: continue if any(ch.isdigit() for ch in t_norm): continue lower = t_norm.lower() if lower in seen: continue seen.add(lower) out.append(t_norm) return out # ------------------------------------------------------------ # OCR → Flashcards # ------------------------------------------------------------ def generate_flashcards_from_ocr_results( username: str, ocr_results: List[Dict], deck_name: str = "ocr", target_lang: str = "en", tags: Optional[List[str]] = None, ) -> Path: """ Takes OCR results (as produced by ocr_tools.ocr_and_translate_batch) and constructs a simple vocab deck. ocr_results: list of dict with keys: - "text": original text - optionally other fields (ignored) """ all_text = [] for res in ocr_results: t = res.get("text") or res.get("raw_text") or "" if t: all_text.append(t) joined = "\n".join(all_text) words = _extract_candidate_words(joined) if not words: raise ValueError("No candidate words found in OCR results.") translator = GoogleTranslator(source="auto", target=target_lang) cards = [] for w in words: try: trans = translator.translate(w) except Exception: continue if not trans: continue if trans.strip().lower() == w.strip().lower(): continue card = { "front": w, "back": trans, "content_type": "ocr_vocab", "language": target_lang, } _ensure_card_stats(card) cards.append(card) if not cards: raise ValueError("No translatable words found to build cards.") decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or [], } save_deck(deck_path, deck) return deck_path # ------------------------------------------------------------ # Conversation/Text → Flashcards # ------------------------------------------------------------ def generate_flashcards_from_text( username: str, text: str, deck_name: str = "conversation", target_lang: str = "en", tags: Optional[List[str]] = None, ) -> Path: """ Build a vocab deck from raw conversation text. """ words = _extract_candidate_words(text) if not words: raise ValueError("No candidate words found in text.") translator = GoogleTranslator(source="auto", target=target_lang) cards = [] for w in words: try: trans = translator.translate(w) except Exception: continue if not trans: continue if trans.strip().lower() == w.strip().lower(): continue card = { "front": w, "back": trans, "content_type": "conversation_vocab", "language": target_lang, } _ensure_card_stats(card) cards.append(card) if not cards: raise ValueError("No translatable words found to build cards.") decks_dir = _get_decks_dir(username) deck_path = decks_dir / f"{deck_name}.json" deck = { "name": deck_name, "cards": cards, "tags": tags or ["conversation"], } save_deck(deck_path, deck) return deck_path