Spaces:

vanderbilt-dsi
/

survey-analytics

Running

App Files Files Community

umangchaudhry commited on Nov 5

Commit

68610da

verified ·

1 Parent(s): 17c6067

Upload 20 files

Browse files

Files changed (17) hide show

crosstab_rag.py +696 -0
crosstab_vectorstores/crosstab_catalog.json +50 -0
prompts/README.md +101 -0
prompts/crosstab_rag_prompt_system.txt +9 -0
prompts/crosstab_rag_prompt_user.txt +7 -0
prompts/questionnaire_rag_prompt.txt +42 -0
prompts/research_brief_prompt.txt +137 -0
prompts/synthesis_prompt_system.txt +10 -0
prompts/synthesis_prompt_user.txt +53 -0
prompts/toplines_rag_prompt.txt +9 -0
prompts/verification_prompt_system.txt +33 -0
prompts/verification_prompt_user.txt +21 -0
questionnaire_rag.py +105 -42
survey_agent.py +688 -363
toplines_rag.py +221 -0
toplines_vectorstores/poll_catalog_toplines.json +18 -0
toplines_vectorstores/toplines_index.json +2290 -0

crosstab_rag.py ADDED Viewed

	@@ -0,0 +1,696 @@

+#!/usr/bin/env python3
+"""
+rag_crosstab_query.py
+Full Crosstab RAG pipeline:
+ - Parse user query for survey/year/month/topic
+ - Use QuestionnaireRAG to find matching questions (reuses existing vectorstore)
+ - Extract variable names from matched questions
+ - Query Pinecone within the appropriate namespace (survey crosstabs namespace)
+ - Collect all parts for the matched question(s)
+ - Summarize with the LLM, cite source filenames/part ids
+"""
+import os
+import re
+import argparse
+from typing import List, Dict, Optional, Any
+from pathlib import Path
+from dotenv import load_dotenv
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain.schema import Document
+from langchain_pinecone import PineconeVectorStore
+from pinecone import Pinecone
+# Import QuestionnaireRAG to reuse existing question matching
+from questionnaire_rag import QuestionnaireRAG
+load_dotenv()
+def _load_prompt_file(filename: str) -> str:
+    """Load a prompt file from the prompts directory"""
+    prompt_dir = Path(__file__).parent / "prompts"
+    prompt_path = prompt_dir / filename
+    if not prompt_path.exists():
+        raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
+    return prompt_path.read_text(encoding="utf-8")
+# -------------------------
+# Config / Environment
+# -------------------------
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY_CROSSTABS")
+PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME_CROSSTABS", "crosstab-index")
+if not OPENAI_API_KEY:
+    raise ValueError("OPENAI_API_KEY environment variable not set")
+if not PINECONE_API_KEY:
+    raise ValueError("PINECONE_API_KEY_CROSSTABS environment variable not set")
+EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
+LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o")
+PINECONE_RETRIEVE_K = 100
+MAX_CROSSTAB_CHUNKS = 50
+# -------------------------
+# Utilities
+# -------------------------
+def extract_year_month_poll(query: str) -> Dict[str, Optional[str]]:
+    out = {"year": None, "month": None, "poll": None}
+    q = query.lower()
+    ym = re.search(r"\b(20\d{2})\b", q)
+    if ym:
+        out["year"] = ym.group(1)
+    months = ["january","february","march","april","may","june",
+              "july","august","september","october","november","december"]
+    for m in months:
+        if m in q:
+            out["month"] = m.capitalize()
+            break
+    if not out["month"]:
+        if any(word in q for word in ["recent", "latest", "current", "now"]):
+            out["month"] = "June"
+            if not out["year"]:
+                out["year"] = "2025"
+    if "vanderbilt" in q or "unity" in q:
+        out["poll"] = "Vanderbilt_Unity_Poll"
+    return out
+# -------------------------
+# Pinecone retrieval + assembly
+# -------------------------
+class CrosstabRetriever:
+    def __init__(self,
+                 pinecone_api_key: str = PINECONE_API_KEY,
+                 index_name: str = PINECONE_INDEX_NAME,
+                 embed_model: str = EMBED_MODEL,
+                 openai_api_key: str = OPENAI_API_KEY,
+                 verbose: bool = False):
+        self.pc = Pinecone(api_key=pinecone_api_key)
+        self.index_name = index_name
+        self.embedder = OpenAIEmbeddings(model=embed_model, openai_api_key=openai_api_key)
+        self.verbose = verbose
+    def _make_vectorstore(self, namespace: str) -> PineconeVectorStore:
+        index = self.pc.Index(self.index_name)
+        return PineconeVectorStore(index=index, embedding=self.embedder, namespace=namespace)
+    def retrieve_parts_for_variable(self, namespace: str, variable_prefix: str, user_query: str = None, k: int = PINECONE_RETRIEVE_K) -> List[Document]:
+        """
+        Retrieve crosstab chunks for a specific variable using direct metadata filtering.
+        Since we already know the exact variable name from QuestionnaireRAG, we use
+        Pinecone metadata filtering instead of semantic search for better accuracy and speed.
+        Args:
+            namespace: Pinecone namespace (e.g., "Vanderbilt_Unity_Poll_2025_February_cleaned_data_crosstabs")
+            variable_prefix: Exact variable name (e.g., "VAND15")
+            user_query: Not used anymore, kept for backward compatibility
+            k: Maximum number of chunks to retrieve (not really needed with exact filtering)
+        Returns:
+            List of Document objects with crosstab data for the variable
+        """
+        try:
+            index = self.pc.Index(self.index_name)
+            stats = index.describe_index_stats()
+            namespaces = stats.get('namespaces', {})
+            if namespace not in namespaces:
+                return []
+        except Exception:
+            return []
+        # Clean variable name - the CSV filename is like "VAND15_crosstab.csv"
+        # So the variable_name stored is "VAND15_crosstab" (from csv_file.stem)
+        # But QuestionnaireRAG returns "VAND15"
+        # We need to match both formats
+        base_variable = variable_prefix.replace("_crosstab", "").split("_")[0]
+        variable_with_suffix = f"{base_variable}_crosstab"
+        if self.verbose:
+            print(f"   🔍 Looking for variable: '{base_variable}' or '{variable_with_suffix}' in namespace: '{namespace}'")
+        # Use Pinecone metadata filtering for exact match
+        # Try both formats: "VAND15" and "VAND15_crosstab"
+        try:
+            # Pinecone supports $or for multiple conditions
+            filter_dict = {
+                "$or": [
+                    {"variable_name": {"$eq": base_variable}},
+                    {"variable_name": {"$eq": variable_with_suffix}}
+                ]
+            }
+            if self.verbose:
+                print(f"   🔧 Filter: {filter_dict}")
+            # Get embedding dimension - we need a valid vector even for metadata-only queries
+            embed_dim = 1536  # Default for text-embedding-3-small
+            try:
+                if hasattr(self.embedder, 'model') and 'small' in str(self.embedder.model).lower():
+                    embed_dim = 1536
+                elif hasattr(self.embedder, 'model') and 'large' in str(self.embedder.model).lower():
+                    embed_dim = 3072
+            except:
+                pass
+            # Use a dummy vector (all zeros is fine for metadata-filtered queries)
+            # Pinecone requires a vector but with exact filters, ranking won't matter
+            dummy_vector = [0.0] * embed_dim
+            result = index.query(
+                vector=dummy_vector,
+                top_k=k,
+                namespace=namespace,
+                filter=filter_dict,
+                include_metadata=True
+            )
+            if self.verbose:
+                print(f"   📊 Pinecone query returned {len(result.matches)} matches")
+            docs = []
+            for match in result.matches:
+                metadata = match.metadata or {}
+                # Debug: print what we found
+                if self.verbose:
+                    found_var = metadata.get("variable_name", "N/A")
+                    found_qid = metadata.get("question_id", "N/A")
+                    print(f"   📄 Found: variable_name='{found_var}', question_id='{found_qid}'")
+                # Pinecone stores content differently depending on how it was uploaded
+                # Try multiple ways to get the content
+                content = None
+                # Method 1: Check if there's a 'text' field in metadata (LangChain storage)
+                if 'text' in metadata:
+                    content = metadata.pop('text', '')
+                # Method 2: Check if content is in the match object itself
+                elif hasattr(match, 'values') and match.values:
+                    # This shouldn't happen with metadata filtering, but just in case
+                    pass
+                # Method 3: Try to reconstruct from metadata if available
+                elif 'page_content' in metadata:
+                    content = metadata.pop('page_content', '')
+                # If we still don't have content, we can't use this document
+                if not content:
+                    if self.verbose:
+                        print(f"   ⚠️  No content found for match, skipping")
+                    continue
+                docs.append(Document(page_content=content, metadata=metadata))
+            if self.verbose:
+                print(f"   ✅ Successfully loaded {len(docs)} document(s)")
+            # Sort by chunk_index to maintain order
+            docs.sort(key=lambda d: d.metadata.get("chunk_index", 999))
+            return docs[:MAX_CROSSTAB_CHUNKS]
+        except Exception as e:
+            if self.verbose:
+                print(f"   ❌ Error with metadata filter: {e}")
+            # Fallback: if metadata filtering fails, try fetching sample documents to debug
+            if self.verbose:
+                print(f"   🔄 Falling back to manual filtering...")
+            try:
+                # Try to fetch a sample to see what's actually in the namespace
+                # First, try fetching without filter to see what variable names exist
+                sample_result = index.query(
+                    vector=[0.0] * 1536,  # Dummy vector
+                    top_k=10,  # Just get a few samples
+                    namespace=namespace,
+                    include_metadata=True
+                )
+                if self.verbose and sample_result.matches:
+                    print(f"   📋 Sample variables in namespace:")
+                    for sample in sample_result.matches[:5]:
+                        sample_meta = sample.metadata or {}
+                        sample_var = sample_meta.get("variable_name", "N/A")
+                        sample_qid = sample_meta.get("question_id", "N/A")
+                        print(f"      - variable_name: '{sample_var}', question_id: '{sample_qid}'")
+                # Now try to find matches manually
+                result = index.query(
+                    vector=[0.0] * 1536,  # Dummy vector
+                    top_k=k * 2,  # Get more to filter from
+                    namespace=namespace,
+                    include_metadata=True
+                )
+                docs = []
+                for match in result.matches:
+                    metadata = match.metadata or {}
+                    var_name = metadata.get("variable_name", "")
+                    question_id = metadata.get("question_id", "")
+                    # Check if this matches our variable (case-insensitive)
+                    # Try matching both "VAND15" and "VAND15_crosstab" formats
+                    var_match = (base_variable.lower() == var_name.lower() or
+                                variable_with_suffix.lower() == var_name.lower() or
+                                question_id.lower().startswith(base_variable.lower() + "_") or
+                                question_id.lower().startswith(base_variable.lower()))
+                    if var_match:
+                        # Try to get content
+                        content = metadata.pop('text', '') or metadata.pop('page_content', '') or ''
+                        if content:
+                            docs.append(Document(page_content=content, metadata=metadata))
+                        elif self.verbose:
+                            print(f"   ⚠️  Matched variable '{var_name}' but no content found")
+                docs.sort(key=lambda d: d.metadata.get("chunk_index", 999))
+                if self.verbose:
+                    print(f"   ✅ Fallback found {len(docs)} document(s)")
+                return docs[:MAX_CROSSTAB_CHUNKS]
+            except Exception as fallback_error:
+                if self.verbose:
+                    print(f"   ❌ Fallback also failed: {fallback_error}")
+                return []
+# -------------------------
+# LLM summarizer
+# -------------------------
+class CrosstabSummarizer:
+    def __init__(self, llm_model: str = LLM_MODEL, openai_api_key: str = OPENAI_API_KEY):
+        self.llm = ChatOpenAI(model=llm_model, openai_api_key=openai_api_key, temperature=0.0)
+    def summarize(self, user_query: str, retrieved_docs: List[Document], question_text: Optional[str] = None, top_n_sources: int = 6) -> Dict:
+        if not retrieved_docs:
+            return {"answer": "No relevant crosstab data found for that query.", "sources": []}
+        context_parts, sources = [], []
+        for i, d in enumerate(retrieved_docs):
+            md = d.metadata or {}
+            id_hint = md.get("question_id") or md.get("variable_name") or f"part_{i+1}"
+            content = d.page_content or ""
+            context_parts.append(f"--- Part {i+1} | {id_hint} ---\n{content}")
+            sources.append(id_hint)
+        context_text = "\n\n".join(context_parts)
+        # Load prompts from files
+        system_prompt = _load_prompt_file("crosstab_rag_prompt_system.txt")
+        question_context = f"\n\nSURVEY QUESTION THAT WAS RETRIEVED: {question_text}" if question_text else ""
+        relevance_check = (
+            "\n\n⚠️ FIRST: Check if the retrieved question above is actually relevant to the user's question. "
+            "If it's about a different topic (e.g., user asked about 'economy' but question is about 'unity' or 'politics'), "
+            "you MUST state this clearly and NOT provide detailed analysis of irrelevant data."
+        ) if question_text else ""
+        user_prompt_template = _load_prompt_file("crosstab_rag_prompt_user.txt")
+        user_prompt = user_prompt_template.format(
+            user_query=user_query,
+            question_context=question_context,
+            relevance_check=relevance_check,
+            context_text=context_text
+        )
+        from langchain.schema import HumanMessage, SystemMessage
+        messages = [SystemMessage(content=system_prompt), HumanMessage(content=user_prompt)]
+        try:
+            result = self.llm.invoke(messages)
+            answer = result.content if hasattr(result, 'content') else str(result)
+        except Exception as e:
+            answer = f"Error generating summary: {e}"
+        return {"answer": answer.strip(), "sources": sources[:top_n_sources]}
+# -------------------------
+# Orchestration - full pipeline
+# -------------------------
+class CrosstabsRAG:
+    def __init__(self, questionnaire_rag: QuestionnaireRAG, verbose: bool = False):
+        """
+        Initialize CrosstabsRAG.
+        Args:
+            questionnaire_rag: Initialized QuestionnaireRAG instance to reuse for question matching
+            verbose: Whether to print detailed logging
+        """
+        self.questionnaire_rag = questionnaire_rag
+        self.verbose = verbose
+        self.retriever = CrosstabRetriever(verbose=verbose)
+        self.summarizer = CrosstabSummarizer()
+    def query(self, user_query: str, filters: Optional[Dict[str, Any]] = None) -> Dict:
+        """
+        Query the crosstab system. Extracts poll, year, and month from the query.
+        Uses QuestionnaireRAG to find matching questions, then retrieves crosstab data.
+        Args:
+            user_query: The question to answer
+            filters: Optional filters dict (may include topic, year, month, survey_name)
+        Returns:
+            Dict with answer, sources, and metadata
+        """
+        # Extract year, month, poll from query
+        hints = extract_year_month_poll(user_query)
+        year, month, poll = hints.get("year"), hints.get("month"), hints.get("poll")
+        # If missing required info, try to get from filters
+        if not year and filters and "year" in filters:
+            year = str(filters["year"])
+        if not month and filters and "month" in filters:
+            month = filters["month"]
+        if not poll and filters and "survey_name" in filters:
+            poll = "Vanderbilt_Unity_Poll"  # Default mapping
+        # If still missing required info, return error instead of prompting
+        if not all([poll, year, month]):
+            missing = []
+            if not poll: missing.append("poll/survey name")
+            if not year: missing.append("year")
+            if not month: missing.append("month")
+            return {"error": f"Could not determine {', '.join(missing)} from query. Please specify in your question."}
+        # Build filters for QuestionnaireRAG
+        q_filters = {
+            "year": int(year),
+            "month": month,
+            "survey_name": "Vanderbilt Unity Poll"  # Map from poll variable if needed
+        }
+        # Add topic filter if provided
+        if filters:
+            if self.verbose:
+                print(f"   📥 Received filters: {filters}")
+            if "topic" in filters and filters["topic"]:
+                q_filters["topic"] = filters["topic"]
+                if self.verbose:
+                    print(f"   📌 Added topic filter: {filters['topic']}")
+            elif self.verbose and "topic" not in filters:
+                print(f"   ⚠️  No 'topic' key in filters dict")
+            elif self.verbose:
+                print(f"   ⚠️  Topic filter is empty/None: {filters.get('topic')}")
+        elif self.verbose:
+            print(f"   ⚠️  No filters dict provided to CrosstabsRAG.query()")
+        # Enhance query text to emphasize topic if provided
+        enhanced_query = user_query
+        if filters and "topic" in filters:
+            topic = filters["topic"]
+            # Make sure topic is mentioned prominently in the query
+            if topic.lower() not in enhanced_query.lower():
+                enhanced_query = f"{topic} {enhanced_query}"
+        # Use QuestionnaireRAG to find matching questions
+        if self.verbose:
+            print(f"🔍 [CrosstabRAG] Step 1: Querying QuestionnaireRAG vectorstore")
+            print(f"   Query: {enhanced_query}")
+            print(f"   Filters being passed: {q_filters}")
+        try:
+            q_result = self.questionnaire_rag.query_with_metadata(
+                question=enhanced_query,
+                filters=q_filters,
+                k=10  # Get more matches to capture all economy questions
+            )
+        except Exception as e:
+            return {"error": f"Error querying questionnaire: {e}"}
+        source_questions = q_result.get("source_questions", [])
+        if not source_questions:
+            return {"error": "No matching questions found in questionnaire for that query."}
+        if self.verbose:
+            print(f"✅ [CrosstabRAG] Step 1 Complete: QuestionnaireRAG matched {len(source_questions)} question(s)")
+            for i, q in enumerate(source_questions[:3], 1):
+                var = q.get("variable_name", "unknown")
+                qtext = q.get("question_text", "")[:80]
+                print(f"   {i}. {var}: {qtext}...")
+        # Build namespace for crosstab retrieval
+        namespace = f"{poll}_{year}_{month}_cleaned_data_crosstabs".replace(" ", "_")
+        # Process ALL matched questions (not just the first one)
+        all_question_answers = []
+        all_sources = []
+        matched_variables = []
+        for matched_question in source_questions:
+            variable_name = matched_question["variable_name"]
+            question_text = matched_question["question_text"]
+            if self.verbose:
+                print(f"\n🔍 [CrosstabRAG] Step 2: Processing {variable_name}")
+                print(f"   Namespace: {namespace}")
+                print(f"   Variable: {variable_name}")
+            # Retrieve crosstab chunks for this specific variable
+            crosstab_docs = self.retriever.retrieve_parts_for_variable(
+                namespace=namespace,
+                variable_prefix=variable_name,
+                user_query=user_query,
+                k=PINECONE_RETRIEVE_K
+            )
+            if not crosstab_docs:
+                if self.verbose:
+                    print(f"   ⚠️  No crosstab data found for {variable_name}")
+                continue
+            if self.verbose:
+                print(f"   ✅ Retrieved {len(crosstab_docs)} crosstab chunk(s)")
+                chunk_ids = [d.metadata.get("question_id", d.metadata.get("variable_name", "unknown")) for d in crosstab_docs[:3]]
+                print(f"   Chunk IDs: {', '.join(chunk_ids)}{' ...' if len(crosstab_docs) > 3 else ''}")
+            # Summarize this question's crosstab data
+            summary = self.summarizer.summarize(
+                user_query=user_query,
+                retrieved_docs=crosstab_docs,
+                question_text=question_text,
+                top_n_sources=6
+            )
+            # Add question identifier to the answer
+            question_header = f"\n\n--- Question: {variable_name} ---\n{question_text}\n"
+            question_answer = question_header + summary["answer"].strip()
+            all_question_answers.append(question_answer)
+            all_sources.extend(summary["sources"])
+            matched_variables.append(variable_name)
+        if not all_question_answers:
+            return {"error": f"No crosstab data found for any of the {len(source_questions)} matched questions in namespace '{namespace}'."}
+        if self.verbose:
+            print(f"\n🔍 [CrosstabRAG] Step 3: Combining {len(all_question_answers)} question(s)")
+        # Combine all question answers into a single comprehensive answer
+        combined_answer = "\n\n".join(all_question_answers)
+        # Add overall citation block
+        citation_block = (
+            f"\n\n---\nSource: {poll.replace('_', ' ')}, {month} {year}\n"
+            f"Questions analyzed: {', '.join(matched_variables)}\n"
+            f"Total questions: {len(matched_variables)}\n"
+        )
+        combined_answer = combined_answer + citation_block
+        return {
+            "answer": combined_answer,
+            "sources": list(set(all_sources)),  # Deduplicate sources
+            "matched_variable": matched_variables[0] if len(matched_variables) == 1 else f"{len(matched_variables)} questions",
+            "matched_variables": matched_variables,  # Add all matched variables
+            "matched_question": source_questions[0]["question_text"] if source_questions else "",
+            "namespace_used": namespace,
+            "survey_info": {"poll": poll, "year": year, "month": month}
+        }
+    def retrieve_raw_data(self, user_query: str, filters: Optional[Dict[str, Any]] = None) -> Dict:
+        """
+        Retrieve raw data without LLM summarization.
+        Used by agent framework to get raw data for synthesis.
+        Args:
+            user_query: The question to answer
+            filters: Optional filters dict (may include topic, year, month, survey_name)
+        Returns:
+            Dict with crosstab_docs_by_variable, matched_questions, namespace_used, survey_info
+        """
+        # Extract year, month, poll from query
+        hints = extract_year_month_poll(user_query)
+        year, month, poll = hints.get("year"), hints.get("month"), hints.get("poll")
+        # If missing required info, try to get from filters
+        if not year and filters and "year" in filters:
+            year = str(filters["year"])
+        if not month and filters and "month" in filters:
+            month = filters["month"]
+        if not poll and filters and "survey_name" in filters:
+            poll = "Vanderbilt_Unity_Poll"  # Default mapping
+        # If still missing required info, return error instead of prompting
+        if not all([poll, year, month]):
+            missing = []
+            if not poll: missing.append("poll/survey name")
+            if not year: missing.append("year")
+            if not month: missing.append("month")
+            return {"error": f"Could not determine {', '.join(missing)} from query. Please specify in your question."}
+        # Build filters for QuestionnaireRAG
+        q_filters = {
+            "year": int(year),
+            "month": month,
+            "survey_name": "Vanderbilt Unity Poll"  # Map from poll variable if needed
+        }
+        # Add topic filter if provided
+        if filters:
+            if self.verbose:
+                print(f"   📥 Received filters: {filters}")
+            if "topic" in filters and filters["topic"]:
+                q_filters["topic"] = filters["topic"]
+                if self.verbose:
+                    print(f"   📌 Added topic filter: {filters['topic']}")
+        # Enhance query text to emphasize topic if provided
+        enhanced_query = user_query
+        if filters and "topic" in filters:
+            topic = filters["topic"]
+            # Make sure topic is mentioned prominently in the query
+            if topic.lower() not in enhanced_query.lower():
+                enhanced_query = f"{topic} {enhanced_query}"
+        # Use QuestionnaireRAG to find matching questions
+        if self.verbose:
+            print(f"🔍 [CrosstabRAG] Step 1: Querying QuestionnaireRAG vectorstore (raw data)")
+            print(f"   Query: {enhanced_query}")
+            print(f"   Filters being passed: {q_filters}")
+        try:
+            q_result = self.questionnaire_rag.retrieve_raw_data(
+                question=enhanced_query,
+                filters=q_filters,
+                k=10  # Get more matches to capture all questions
+            )
+        except Exception as e:
+            return {"error": f"Error querying questionnaire: {e}"}
+        source_questions = q_result.get("source_questions", [])
+        if not source_questions:
+            return {"error": "No matching questions found in questionnaire for that query."}
+        if self.verbose:
+            print(f"✅ [CrosstabRAG] Step 1 Complete: QuestionnaireRAG matched {len(source_questions)} question(s)")
+            for i, q in enumerate(source_questions[:3], 1):
+                var = q.get("variable_name", "unknown")
+                qtext = q.get("question_text", "")[:80]
+                print(f"   {i}. {var}: {qtext}...")
+        # Build namespace for crosstab retrieval
+        namespace = f"{poll}_{year}_{month}_cleaned_data_crosstabs".replace(" ", "_")
+        # Process ALL matched questions and collect raw crosstab documents
+        crosstab_docs_by_variable = {}
+        matched_variables = []
+        for matched_question in source_questions:
+            variable_name = matched_question["variable_name"]
+            question_text = matched_question["question_text"]
+            if self.verbose:
+                print(f"\n🔍 [CrosstabRAG] Step 2: Processing {variable_name} (raw data)")
+                print(f"   Namespace: {namespace}")
+                print(f"   Variable: {variable_name}")
+            # Retrieve crosstab chunks for this specific variable
+            crosstab_docs = self.retriever.retrieve_parts_for_variable(
+                namespace=namespace,
+                variable_prefix=variable_name,
+                user_query=user_query,
+                k=PINECONE_RETRIEVE_K
+            )
+            if not crosstab_docs:
+                if self.verbose:
+                    print(f"   ⚠️  No crosstab data found for {variable_name}")
+                continue
+            if self.verbose:
+                print(f"   ✅ Retrieved {len(crosstab_docs)} crosstab chunk(s)")
+            # Store raw documents without summarization
+            crosstab_docs_by_variable[variable_name] = {
+                "crosstab_docs": crosstab_docs,
+                "question_text": question_text,
+                "matched_question": matched_question
+            }
+            matched_variables.append(variable_name)
+        if not crosstab_docs_by_variable:
+            return {"error": f"No crosstab data found for any of the {len(source_questions)} matched questions in namespace '{namespace}'."}
+        if self.verbose:
+            print(f"\n✅ [CrosstabRAG] Step 2 Complete: Retrieved raw data for {len(matched_variables)} question(s)")
+        return {
+            "crosstab_docs_by_variable": crosstab_docs_by_variable,
+            "matched_questions": source_questions,
+            "matched_variables": matched_variables,
+            "namespace_used": namespace,
+            "survey_info": {"poll": poll, "year": year, "month": month}
+        }
+# -------------------------
+# CLI / Interactive
+# -------------------------
+def main():
+    parser = argparse.ArgumentParser(description="Crosstab RAG CLI - query survey crosstabs.")
+    parser.add_argument("--query", "-q", help="Question to ask (if omitted, interactive).", default=None)
+    args = parser.parse_args()
+    # Initialize QuestionnaireRAG first (needed for CrosstabsRAG)
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    pinecone_api_key = os.getenv("PINECONE_API_KEY")
+    if not openai_api_key or not pinecone_api_key:
+        print("Error: Missing API keys")
+        print("Set OPENAI_API_KEY and PINECONE_API_KEY environment variables")
+        return
+    questionnaire_rag = QuestionnaireRAG(
+        openai_api_key=openai_api_key,
+        pinecone_api_key=pinecone_api_key,
+        persist_directory="./questionnaire_vectorstores",
+        verbose=False
+    )
+    system = CrosstabsRAG(questionnaire_rag=questionnaire_rag)
+    if args.query:
+        out = system.query(args.query)
+        if "error" in out:
+            print(f"Error: {out['error']}")
+        else:
+            matched_question = out.get("matched_question", "")
+            if matched_question:
+                print(f"\nSURVEY QUESTION:\n{matched_question}\n")
+            print("ANSWER:\n", out["answer"])
+    else:
+        print("Interactive Crosstab RAG\nType 'quit' to stop.")
+        while True:
+            try:
+                q = input("\nYour question: ").strip()
+                if not q or q.lower() in ("quit","exit"):
+                    break
+                out = system.query(q)
+                if "error" in out:
+                    print(f"Error: {out['error']}")
+                    continue
+                matched_question = out.get("matched_question", "")
+                if matched_question:
+                    print(f"\nSURVEY QUESTION:\n{matched_question}\n")
+                print("ANSWER:\n", out["answer"])
+            except KeyboardInterrupt:
+                break
+if __name__ == "__main__":
+    main()

crosstab_vectorstores/crosstab_catalog.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "Vanderbilt_Unity_Poll_2023_June_cleaned_data_crosstabs": {
+    "num_questions": 18,
+    "num_chunks": 61,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2023_June_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2023_March_cleaned_data_crosstabs": {
+    "num_questions": 9,
+    "num_chunks": 33,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2023_March_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2023_September_cleaned_data_crosstabs": {
+    "num_questions": 15,
+    "num_chunks": 54,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2023_September_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2024_March_cleaned_data_crosstabs": {
+    "num_questions": 26,
+    "num_chunks": 86,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2024_March_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2024_October_cleaned_data_crosstabs": {
+    "num_questions": 20,
+    "num_chunks": 70,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2024_October_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2024_September_cleaned_data_crosstabs": {
+    "num_questions": 19,
+    "num_chunks": 61,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2024_September_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2025_February_cleaned_data_crosstabs": {
+    "num_questions": 29,
+    "num_chunks": 122,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2025_February_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  },
+  "Vanderbilt_Unity_Poll_2025_June_cleaned_data_crosstabs": {
+    "num_questions": 30,
+    "num_chunks": 106,
+    "path": "crosstabs/Vanderbilt_Unity_Poll_2025_June_cleaned_data_crosstabs",
+    "has_questionnaire": true
+  }
+}

prompts/README.md ADDED Viewed

	@@ -0,0 +1,101 @@

+# Prompts Directory
+This directory contains all the prompts used by the survey agent and RAG pipelines. Prompts are organized by component and purpose for easy maintenance and updates.
+## Survey Agent Prompts
+### `research_brief_prompt.txt`
+**Purpose**: Used by the research brief generator to plan multi-stage research queries.
+**Used in**: `survey_agent.py` → `_generate_research_brief()`
+**Dynamic Variables**:
+- `{available_pipelines}` - Status of available data pipelines
+- `{available_surveys}` - List of available survey names
+- `{available_months}` - List of available months by year
+- `{verification_context}` - Context from previous verification failures (if retrying)
+### `verification_prompt_system.txt`
+**Purpose**: System message for the verification step that checks if retrieved data matches the user's question.
+**Used in**: `survey_agent.py` → `_verify_results()`
+### `verification_prompt_user.txt`
+**Purpose**: User message template for verification step with actual data details.
+**Used in**: `survey_agent.py` → `_verify_results()`
+**Dynamic Variables**:
+- `{question}` - The user's question
+- `{retrieval_summary}` - Summary of retrieved data
+- `{raw_data_details}` - Detailed raw data structure
+### `synthesis_prompt_system.txt`
+**Purpose**: System message for the final synthesis step that combines all research results.
+**Used in**: `survey_agent.py` → `_synthesize_response()`
+### `synthesis_prompt_user.txt`
+**Purpose**: User message template for synthesis with all retrieved data.
+**Used in**: `survey_agent.py` → `_synthesize_response()`
+**Dynamic Variables**:
+- `{stage_count}` - "multiple stages" or "the research"
+- `{full_question}` - The user's question
+- `{reasoning}` - Research plan reasoning
+- `{context_parts}` - All retrieved raw data formatted
+- `{unavailable_note}` - Note about unavailable pipelines (if any)
+## RAG Pipeline Prompts
+### `questionnaire_rag_prompt.txt`
+**Purpose**: System prompt for questionnaire RAG that answers questions about survey questions.
+**Used in**: `questionnaire_rag.py` → `_get_prompt()`
+**Dynamic Variables** (handled by LangChain):
+- `{catalog}` - Available polls summary
+- `{context}` - Retrieved question context
+- `{question}` - User's question
+### `crosstab_rag_prompt_system.txt`
+**Purpose**: System message for crosstab RAG that analyzes cross-tabulation data.
+**Used in**: `crosstab_rag.py` → `CrosstabSummarizer.summarize()`
+### `crosstab_rag_prompt_user.txt`
+**Purpose**: User message template for crosstab analysis.
+**Used in**: `crosstab_rag.py` → `CrosstabSummarizer.summarize()`
+**Dynamic Variables**:
+- `{user_query}` - The user's question
+- `{question_context}` - Retrieved survey question text (if available)
+- `{relevance_check}` - Instructions for relevance checking (if question available)
+- `{context_text}` - Formatted crosstab data chunks
+### `toplines_rag_prompt.txt`
+**Purpose**: Prompt for toplines RAG that analyzes response frequencies.
+**Used in**: `toplines_rag.py` → `_synthesize_answer()`
+**Dynamic Variables**:
+- `{query}` - The user's question
+- `{context_snippets}` - Formatted topline document snippets
+## Updating Prompts
+To update a prompt:
+1. Edit the corresponding `.txt` file in this directory
+2. Restart the agent/RAG system to load the new prompt
+3. No code changes needed - prompts are loaded dynamically at runtime
+## Notes
+- All prompts use Python string formatting (`{variable}` syntax)
+- Dynamic variables are filled in at runtime by the calling code
+- Prompts are loaded once per module import (not cached, but loaded fresh each time)
+- If a prompt file is missing, the system will raise a `FileNotFoundError` with a clear message

prompts/crosstab_rag_prompt_system.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+You are a data analyst assistant specialized in interpreting survey crosstab tables.
+🚨 CRITICAL: Before answering, check if the retrieved question actually matches the user's query.
+- If the question is about a DIFFERENT topic than what the user asked, you MUST explicitly state this.
+- Do NOT provide detailed analysis of irrelevant data - instead clearly explain that the retrieved question doesn't match.
+- Only provide detailed analysis if the question is relevant to the user's query.
+Follow the structure and provide clear, specific answers based only on the context provided.

prompts/crosstab_rag_prompt_user.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+User question: {user_query}{question_context}{relevance_check}
+Context (crosstab parts):
+{context_text}
+Answer the question based only on the context above. If the retrieved question doesn't match the user's query, explicitly state this.

prompts/questionnaire_rag_prompt.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+You are an expert assistant for analyzing poll questionnaires.
+🚨 CRITICAL RULES - NEVER VIOLATE THESE:
+1. **ONLY use information from the provided context**
+   - Do NOT make up questions, polls, or dates
+   - Do NOT assume a poll exists if it's not in the context
+   - If information is missing, say "I don't have data for [X]" rather than making it up
+2. **Verify data exists before listing it**
+   - Before mentioning any poll, check it's actually in the context
+   - Before listing questions, confirm they exist in the retrieved data
+   - If asked about multiple time periods, explicitly state which ones have data and which don't
+3. **Be explicit about what's NOT in the data**
+   - If asked about "2024 and 2025" but only 2025 data exists, say: "I have data for 2025, but there is no 2024 data in the retrieved results"
+   - Never silently skip missing data - always acknowledge it
+4. **When listing questions:**
+   - List ALL questions from the context in order
+   - Include full question text and response options
+   - Note sampling inline in clear language:
+     * "Asked to all respondents" (not "ASK ALL")
+     * "Asked to half the sample" (not "HALFSAMP1=1")
+     * "Asked only if [condition]" (not technical codes)
+   - If sibling variants exist, note "One of two versions shown to different groups"
+   - Always cite which poll(s) you're using
+5. **Format for scannability:**
+   - Use numbered lists for questions
+   - Bold question text
+   - Include response options as bullet points
+   - Put sampling info in parentheses after question
+Available polls in the system (for reference):
+{catalog}
+Context (ONLY source of truth):
+{context}
+Question: {question}

prompts/research_brief_prompt.txt ADDED Viewed

	@@ -0,0 +1,137 @@

+You are a research planning expert for survey data analysis.
+# TODO: REMOVE WHEN PIPELINES READY - Use dynamic status
+Available data sources:
+{available_pipelines}
+# TODO: REMOVE WHEN PIPELINES READY - START
+⚠️ IMPORTANT: Currently questionnaire, toplines, and crosstabs pipelines are available.
+- SQL pipeline is NOT yet available
+- If the user asks for raw data analysis requiring SQL, use action="followup" to inform them
+- You CAN use toplines and crosstabs for response frequencies and cross-tabulations
+# TODO: REMOVE WHEN PIPELINES READY - END
+{available_surveys}
+{available_months}
+You have FOUR possible actions:
+**1. followup** - Ask clarifying question if ambiguous OR if user asks for unavailable data
+**2. answer** - Answer directly without data (system questions, general knowledge)
+**3. route_to_sources** - Simple query that can be answered with one-shot data retrieval
+   Use this for MOST queries including:
+   - "What questions were asked in June 2025?" ← Use this with QUESTIONNAIRE pipeline
+   - "Show me all healthcare questions" ← Use this with QUESTIONNAIRE pipeline
+   - "What was Trump's approval in June 2025?" ← Use this with TOPLINES pipeline (approval = response data)
+   - "What about June 2025?" (when June 2022 was discussed before) ← Use this for the NEW date only
+   - ANY query asking about a SINGLE time period or survey ← Use this
+   CRITICAL PIPELINE SELECTION:
+   - Use QUESTIONNAIRE when user asks: "what questions", "list questions", "show questions"
+   - Use TOPLINES when user asks: "approval", "ratings", "percentages", "how many", "what %", "response frequencies"
+   - Use CROSSTABS when user asks: "vary by", "breakdown by", "by gender/age/race/etc", "differences by"
+   CRITICAL: When user mentions a new time period, retrieve ONLY that period.
+   Do NOT create comparative queries unless explicitly requested!
+**4. execute_stages** - Complex query requiring SEQUENTIAL staged research
+   Use this ONLY for:
+   - Explicit comparisons: "compare 2024 vs 2025", "what changed between surveys"
+   - Queries with "most/least/best/worst" needing analysis across multiple retrievals
+   - Queries explicitly asking for differences or changes
+   DO NOT use stages for simple follow-up questions about different time periods!
+   # TODO: REMOVE WHEN PIPELINES READY - START
+   NOTE: SQL pipeline isn't available yet, but toplines and crosstabs are available for analysis
+   # TODO: REMOVE WHEN PIPELINES READY - END
+CRITICAL RULES FOR CONVERSATION CONTEXT:
+- When user says "what about [X]?" they're asking a NEW question about X
+- Do NOT assume they want to compare with previous topics
+- "What about June 2025?" means "show me June 2025" (NOT "compare with previous time period")
+- Only create multi-stage queries when user EXPLICITLY asks to compare
+🚨 HANDLING REFERENCE PHRASES (e.g., "these questions", "for each", "all of them"):
+- When user references previous results (e.g., "for each of these questions", "how do responses vary for these"),
+  you MUST infer the context from conversation history:
+  - Extract time periods (year, month) from previous messages
+  - If previous answer showed questions from February and June 2025, use those months
+  - If previous answer listed multiple questions, create stages for each question OR each month (depending on query)
+  - Example: "how do responses vary by gender for each of these questions?"
+    → If previous answer showed economy questions from Feb and June 2025, create stages for each month
+    → Use action="execute_stages" with stage per month, query for economy questions by gender
+- DO NOT ask followup for month/year if you can infer it from conversation history
+FILTERING RULES:
+- Extract survey name from user query and map to exact stored name
+- "Unity Poll" → "Vanderbilt_Unity_Poll"
+- Be precise with year and month extraction
+CRITICAL: HANDLING YEAR-ONLY QUERIES (no month specified):
+- If user provides ONLY a year (e.g., "in 2025", "for 2025") and asks for crosstabs or toplines:
+  → These pipelines REQUIRE a month.
+  **ALWAYS ASK FOLLOWUP** - Do NOT assume they want the entire year:
+     - action="followup"
+     - followup_question="Which month(s) in 2025 would you like to see? Available months: [list months from the available polls description above]"
+  **ONLY create multiple stages if user EXPLICITLY asks for year-wide/all months:**
+     - Examples: "across all of 2025", "for the entire year 2025", "all months in 2025"
+     - Then: action="execute_stages" with one stage per available month
+- If user provides year+month → use single-stage (route_to_sources)
+- If user provides ONLY year + questionnaire pipeline → can query by year only (questionnaire supports year-only filters)
+- If user provides ONLY year + crosstabs/toplines → MUST ask followup (unless user explicitly requests all months/year-wide)
+{verification_context}
+Examples:
+User: "what questions were asked in June 2025?"
+Brief: action=route_to_sources, retrieve June 2025 questions
+User: "what about June 2025?" (after discussing June 2022)
+Brief: action=route_to_sources, retrieve June 2025 questions ← NOT staged!
+User: "compare June 2024 vs June 2025"
+Brief: action=execute_stages, stage 1: 2024, stage 2: 2025 ← This needs stages
+User: "june 2022 unity poll"
+Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_Unity_Poll'
+User: "How do responses vary by gender in 2025?" (NO MONTH, asking for crosstabs)
+Brief: action=followup, followup_question="Which month(s) in 2025 would you like to see? Available months: February, June"
+DO NOT create multiple stages unless user explicitly asks for "all months" or "entire year"
+User: "How do responses vary by gender across all of 2025?" (EXPLICIT year-wide request)
+Brief: action=execute_stages, stage 1: year=2025, month=February, stage 2: year=2025, month=June
+User: "What questions were asked in 2025?" (NO MONTH, asking for questionnaire)
+Brief: action=route_to_sources, year=2025 (questionnaire supports year-only filters)
+User: "what questions were asked about the economy in 2025?" (first message)
+Brief: action=route_to_sources, year=2025, topic='economy'
+User: "how do the responses to questions about the economy vary by gender for each of these questions?" (follow-up, referencing previous)
+Brief: action=execute_stages
+  - Previous question: "what questions about the economy in 2025?"
+  - Infer: User asked about 2025, so use ALL available months for 2025 (February and June)
+  - Stage 1: year=2025, month=February, crosstabs for economy questions by gender
+  - Stage 2: year=2025, month=June, crosstabs for economy questions by gender
+  - DO NOT ask followup - infer months from previous question's year (2025)
+User: "what was trump's approval in 2025?" (asks followup for month)
+Brief: action=followup, followup_question="Which month(s) in 2025 would you like to see Trump's approval ratings for? Available months: February, June"
+User: "June" (short answer to followup)
+Brief: action=route_to_sources
+  - Previous question: "what was trump's approval in 2025?"
+  - Combine: "Trump's approval in June 2025"
+  - Use TOPLINES pipeline (original question asked about approval/ratings)
+  - Filters: year=2025, month=June, topic/query about Trump approval
+  - DO NOT use questionnaire pipeline - user wants approval DATA, not questions

prompts/synthesis_prompt_system.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+You are a survey data analyst synthesizing research results. Your primary responsibilities are:
+1. **Extract and present ACTUAL DATA VALUES**: When the user asks about responses, percentages, or breakdowns, you MUST extract and present the actual numbers, percentages, and counts from the raw data. DO NOT provide generic descriptions like "responses are broken down by gender" - instead say "Male: 45% approve, 30% disapprove. Female: 35% approve, 40% disapprove" with the actual numbers from the data.
+2. **Relevance check**: Only synthesize data that is actually relevant to the user's question. If retrieved data doesn't match the question, explicitly state this and avoid providing irrelevant analysis.
+3. **Data accuracy**: Use only the data provided in the context. Extract specific numbers, percentages, and values. Present them clearly and accurately.
+4. **For crosstabs questions**: When asked about variations by demographics, extract the actual percentages/numbers for each demographic group from the crosstab chunks and present them in a clear, organized format.

prompts/synthesis_prompt_user.txt ADDED Viewed

	@@ -0,0 +1,53 @@

+Synthesize raw data from {stage_count} to answer the user's question.
+User question: {full_question}
+Research plan: {reasoning}
+Retrieved raw data:
+{context_parts}
+{unavailable_note}
+🚨 CRITICAL INSTRUCTIONS:
+1. **RELEVANCE CHECK FIRST**: For each stage's data, check if it actually answers the user's question.
+   - If a stage retrieved data about a DIFFERENT topic than what the user asked, explicitly state this.
+   - Example: If user asked about "economy" but a stage returned data about "unity" or "political topics", clearly state that this stage did not find relevant data.
+   - Do NOT provide detailed analysis of irrelevant data - instead explain what was found and why it doesn't match.
+2. **HANDLE DIFFERENT DATA TYPES - EXTRACT ACTUAL NUMBERS**:
+   - **QUESTIONNAIRE DATA**: Format questions clearly with question text, response options, and topics. Use this when user asks "what questions were asked?"
+   - **TOPLINES DATA**: Present response frequencies and percentages clearly with EXACT NUMBERS. Use this when user asks about approval ratings, percentages, or response frequencies.
+   - **CROSSTABS DATA**: CRITICAL - Extract and present the ACTUAL NUMBERS, PERCENTAGES, and BREAKDOWNS from the crosstab data.
+     * When user asks "how do responses vary by gender/age/etc.", you MUST extract and present the actual percentages for each demographic group
+     * Example: "Male: 45% approve, 30% disapprove. Female: 35% approve, 40% disapprove."
+     * DO NOT just say "responses are broken down by gender" - you MUST include the actual numbers
+     * Extract percentages, counts, and breakdowns from the crosstab chunks provided
+     * Present the data in a clear, organized format showing the actual variation by the requested demographic
+   - Combine data types appropriately when multiple types are available.
+3. **ONLY SYNTHESIZE RELEVANT DATA**:
+   - Focus your answer on stages that actually addressed the user's question.
+   - For irrelevant stages, briefly acknowledge them and explain why they don't help answer the question.
+4. **Answer Structure - INCLUDE ACTUAL DATA**:
+   - If this is a comparative query, clearly organize by the comparison dimensions WITH ACTUAL NUMBERS
+   - If this is an analytical query (most/least/best/worst), perform the analysis USING THE ACTUAL DATA VALUES
+   - Preserve important details from RELEVANT research
+   - Use natural language, be clear and organized
+   - Cite which poll(s), survey dates, or stage(s) information comes from
+   - Format numbers and percentages clearly - ALWAYS include the actual values from the data
+   - For crosstabs: Extract and present the actual breakdown percentages/numbers for each demographic group
+   - DO NOT provide generic descriptions - provide specific numbers, percentages, and data points from the retrieved data
+5. **When No Relevant Data Found**:
+   - If multiple stages returned irrelevant data, clearly state: "The retrieved data does not match your question about [topic]. The available data is about [different topics found]. I cannot provide an answer to your specific question with the current data."
+   - Do NOT synthesize irrelevant information just because it was retrieved.
+6. **Data Integrity**:
+   - Do NOT make up information not in the retrieved raw data
+   - Use only the data provided in the context above
+   - If percentages or numbers are provided, use them accurately
+   - TODO: REMOVE WHEN PIPELINES READY - If some data sources weren't available, clearly state this and explain what you CAN provide

prompts/toplines_rag_prompt.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+You are a polling analyst. Use the following Vanderbilt Unity Poll topline data to answer the question accurately.
+Question: {query}
+Context:
+{context_snippets}
+Write a concise, factual summary of the topline results, referencing the poll name and date. If the question is not answerable from this data, politely state that the system only has Vanderbilt Unity Poll data.

prompts/verification_prompt_system.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+You are a verification expert. Your ONLY job is to check if the retrieved RAW DATA/Questions matches what the user asked for.
+CRITICAL RULES:
+1. **Match the question literally** - Don't add requirements the user didn't ask for
+   - If they asked "what questions were asked?" and we retrieved questions → SUCCESS
+   - If they asked "what are the results/percentages?" and we only have questions → FAILURE
+   - If they asked "how do responses vary by X?" and we have crosstabs → SUCCESS
+2. **Examine the raw data structure**:
+   - For questionnaire: Check if source_questions exist and match the query topic/intent
+   - For toplines: Check if retrieved_docs exist and contain relevant response data
+   - For crosstabs: Check if crosstab_docs_by_variable exist for the requested breakdown
+3. **Only fail if there's an actual problem**:
+   - We retrieved the wrong type of data (e.g., questions when they asked for results)
+   - We retrieved from the wrong time period/survey (AND this is confirmed by mismatched filters AND raw data time period)
+   - The retrieved data doesn't match the query topic (e.g., asked about "economy" but got "politics")
+   - We have no data when the user asked for specific data
+4. **Do NOT fail if**:
+   - User asked for questions and we got questions (even if we don't have "analysis")
+   - User asked for data from June 2025 and filters show year=2025, month=June AND raw data shows the same period → APPROVE
+   - The time period in the raw data matches the filters applied → TRUST the filtering system worked correctly
+   - The data seems sufficient to answer their actual question
+   - We have at least some relevant data (even if not perfect)
+5. **CRITICAL: Time Period Verification**:
+   - If filters were applied (year=2025, month=June) AND the raw data details show "Time period: June 2025" → APPROVE
+   - The retrieval system filters data BEFORE returning it, so if filters match the time period shown, trust it
+   - Only fail if there's a CLEAR mismatch (e.g., filters say June 2025 but raw data shows March 2024)
+Be practical, not pedantic. If the retrieved raw data can answer what they asked, approve it.

prompts/verification_prompt_user.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+User question: "{question}"
+Filters applied during retrieval:
+{filters_applied}
+Summary of retrieved data:
+{retrieval_summary}
+Detailed raw data structure:
+{raw_data_details}
+Question: Can we answer the user's question with this raw data?
+- Consider the TYPE of data (questions vs results vs crosstabs)
+- Consider the TOPIC relevance (does the data match what they asked about?)
+- Consider the TIME PERIOD (is it from the right survey/month?) - Check both the filters applied AND the time period shown in the raw data details
+- Consider the QUANTITY (do we have at least some data?)
+IMPORTANT: If filters were applied (e.g., year=2025, month=June) AND the raw data details show the same time period, trust that the time period is correct. The retrieval system filters data before returning it.
+Answer YES only if the raw data is sufficient to answer their question. Answer NO if data is missing, wrong type, wrong topic, or wrong time period.

questionnaire_rag.py CHANGED Viewed

@@ -27,6 +27,15 @@ except ImportError:
     pass
 class QuestionnaireRAG:
     """
     Improved questionnaire RAG with:
@@ -267,6 +276,28 @@ class QuestionnaireRAG:
                         print(f"⚠️  Filtered out: wrong survey {metadata.get('survey_name')} != {matched_name}")
                     valid = False
             if valid:
                 validated_docs.append(doc)
@@ -274,49 +305,9 @@ class QuestionnaireRAG:
     def _get_prompt(self) -> ChatPromptTemplate:
         """Get the improved system prompt with anti-hallucination measures"""
         return ChatPromptTemplate.from_messages([
-            ("system", """You are an expert assistant for analyzing poll questionnaires.
-🚨 CRITICAL RULES - NEVER VIOLATE THESE:
-1. **ONLY use information from the provided context**
-   - Do NOT make up questions, polls, or dates
-   - Do NOT assume a poll exists if it's not in the context
-   - If information is missing, say "I don't have data for [X]" rather than making it up
-2. **Verify data exists before listing it**
-   - Before mentioning any poll, check it's actually in the context
-   - Before listing questions, confirm they exist in the retrieved data
-   - If asked about multiple time periods, explicitly state which ones have data and which don't
-3. **Be explicit about what's NOT in the data**
-   - If asked about "2024 and 2025" but only 2025 data exists, say: "I have data for 2025, but there is no 2024 data in the retrieved results"
-   - Never silently skip missing data - always acknowledge it
-4. **When listing questions:**
-   - List ALL questions from the context in order
-   - Include full question text and response options
-   - Note sampling inline in clear language:
-     * "Asked to all respondents" (not "ASK ALL")
-     * "Asked to half the sample" (not "HALFSAMP1=1")
-     * "Asked only if [condition]" (not technical codes)
-   - If sibling variants exist, note "One of two versions shown to different groups"
-   - Always cite which poll(s) you're using
-5. **Format for scannability:**
-   - Use numbered lists for questions
-   - Bold question text
-   - Include response options as bullet points
-   - Put sampling info in parentheses after question
-Available polls in the system (for reference):
-{catalog}
-Context (ONLY source of truth):
-{context}
-Question: {question}
-"""),
             ("human", "Answer:")
         ])
@@ -349,6 +340,78 @@ Question: {question}
         """
         return self._query_internal(question, filters, k)
     def _query_internal(
         self,
         question: str,

     pass
+def _load_prompt_file(filename: str) -> str:
+    """Load a prompt file from the prompts directory"""
+    prompt_dir = Path(__file__).parent / "prompts"
+    prompt_path = prompt_dir / filename
+    if not prompt_path.exists():
+        raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
+    return prompt_path.read_text(encoding="utf-8")
 class QuestionnaireRAG:
     """
     Improved questionnaire RAG with:
                         print(f"⚠️  Filtered out: wrong survey {metadata.get('survey_name')} != {matched_name}")
                     valid = False
+            # Check topic (if topic filter is provided)
+            if "topic" in filters and valid:
+                expected_topic = filters["topic"].lower()
+                # Topics are stored as comma-separated string in metadata
+                doc_topics = metadata.get("topics", "")
+                if isinstance(doc_topics, str):
+                    doc_topics_list = [t.strip().lower() for t in doc_topics.split(",")]
+                elif isinstance(doc_topics, list):
+                    doc_topics_list = [str(t).strip().lower() for t in doc_topics]
+                else:
+                    doc_topics_list = []
+                if self.verbose and valid:
+                    var_name = metadata.get("variable_name", "unknown")
+                    print(f"   🔍 Checking topic '{expected_topic}' for {var_name}: doc_topics={doc_topics_list}")
+                if expected_topic not in doc_topics_list:
+                    if self.verbose:
+                        var_name = metadata.get("variable_name", "unknown")
+                        print(f"⚠️  Filtered out {var_name}: topic '{expected_topic}' not in {doc_topics_list}")
+                    valid = False
             if valid:
                 validated_docs.append(doc)
     def _get_prompt(self) -> ChatPromptTemplate:
         """Get the improved system prompt with anti-hallucination measures"""
+        system_prompt_template = _load_prompt_file("questionnaire_rag_prompt.txt")
         return ChatPromptTemplate.from_messages([
+            ("system", system_prompt_template),
             ("human", "Answer:")
         ])
         """
         return self._query_internal(question, filters, k)
+    def retrieve_raw_data(
+        self,
+        question: str,
+        filters: Optional[Dict[str, Any]] = None,
+        k: int = 20
+    ) -> Dict[str, Any]:
+        """
+        Retrieve raw data without LLM formatting.
+        Used by agent framework to get raw data for synthesis.
+        Returns:
+            Dict with 'source_questions', 'num_sources', 'filters_applied', 'retrieved_docs'
+        """
+        if self.verbose:
+            print(f"\n📊 [Raw Data] Query: {question}")
+            if filters:
+                print(f"🔍 Filters: {filters}")
+        # Build Pinecone filter
+        pinecone_filter = self._build_pinecone_filter(filters or {})
+        # Retrieve documents
+        if pinecone_filter:
+            if self.verbose:
+                print(f"🔧 Pinecone filter: {pinecone_filter}")
+            retriever = self.vectorstore.as_retriever(
+                search_kwargs={"k": k, "filter": pinecone_filter}
+            )
+        else:
+            retriever = self.vectorstore.as_retriever(search_kwargs={"k": k})
+        docs = retriever.invoke(question)
+        if self.verbose:
+            print(f"📥 Retrieved {len(docs)} documents from Pinecone")
+        # Validate results match filters
+        if filters:
+            docs = self._validate_results(docs, filters)
+            if self.verbose:
+                print(f"✅ After validation: {len(docs)} documents")
+        # Check if we have any results
+        if not docs:
+            return {
+                "source_questions": [],
+                "num_sources": 0,
+                "filters_applied": filters or {},
+                "retrieved_docs": []
+            }
+        # Reconstruct full questions
+        full_questions = []
+        seen_ids = set()
+        for doc in docs:
+            q_id = doc.metadata.get('question_id')
+            if q_id and q_id not in seen_ids:
+                if q_id in self.questions_by_id:
+                    full_questions.append(self.questions_by_id[q_id])
+                    seen_ids.add(q_id)
+        # Sort by position to maintain survey order
+        full_questions.sort(key=lambda q: (q.get('poll_date', ''), q.get('position', 0)))
+        return {
+            'source_questions': full_questions,
+            'num_sources': len(full_questions),
+            'filters_applied': filters or {},
+            'retrieved_docs': docs
+        }
     def _query_internal(
         self,
         question: str,

survey_agent.py CHANGED Viewed

@@ -13,8 +13,8 @@ When new pipelines (toplines, crosstabs, SQL) become available:
 Current Status:
 - ✅ Questionnaire pipeline: ACTIVE
-- ⏳ Toplines pipeline: Not yet implemented
-- ⏳ Crosstabs pipeline: Not yet implemented
 - ⏳ SQL pipeline: Not yet implemented
 """
@@ -32,6 +32,21 @@ from pydantic import BaseModel, Field, ConfigDict
 # Import the questionnaire RAG
 from questionnaire_rag import QuestionnaireRAG
 try:
     from dotenv import load_dotenv
@@ -47,7 +62,7 @@ except ImportError:
 class QueryFilters(BaseModel):
     """Filters for data source queries - Pydantic v2 with strict schema"""
     model_config = ConfigDict(extra="forbid")
     year: Optional[int] = Field(default=None, description="Year filter (e.g., 2025)")
     month: Optional[str] = Field(default=None, description="Month filter (e.g., 'February')")
     poll_date: Optional[str] = Field(default=None, description="Specific poll date (e.g., '2025-02-15')")
@@ -59,7 +74,7 @@ class QueryFilters(BaseModel):
 class DataSource(BaseModel):
     """Represents a data source to query"""
     model_config = ConfigDict(extra="forbid")
     source_type: Literal["questionnaire", "toplines", "crosstabs", "sql"]
     query_description: str = Field(description="What to retrieve from this source")
     filters: QueryFilters = Field(default_factory=QueryFilters, description="Filters to apply")
@@ -69,13 +84,13 @@ class DataSource(BaseModel):
 class ResearchStage(BaseModel):
     """A single stage in a multi-stage research plan"""
     model_config = ConfigDict(extra="forbid")
     stage_number: int = Field(description="Stage number (1-indexed)")
     description: str = Field(description="What this stage accomplishes")
     data_sources: List[DataSource] = Field(description="Data sources to query in this stage")
     depends_on_stages: List[int] = Field(default_factory=list, description="Which prior stages this depends on")
     use_previous_results_for: Optional[str] = Field(
-        default=None,
         description="How to use previous stage results (e.g., 'Extract question IDs from stage 1')"
     )
@@ -83,14 +98,14 @@ class ResearchStage(BaseModel):
 class ResearchBrief(BaseModel):
     """Research brief - can be either single-stage or multi-stage"""
     model_config = ConfigDict(extra="forbid")
     action: Literal["answer", "followup", "route_to_sources", "execute_stages"]
     followup_question: Optional[str] = Field(default=None, description="Follow-up question to ask user")
     reasoning: str = Field(description="Why this approach was chosen")
     # For simple queries (single-stage)
     data_sources: List[DataSource] = Field(default_factory=list, description="Data sources for simple queries")
     # For complex queries (multi-stage)
     stages: List[ResearchStage] = Field(default_factory=list, description="Ordered stages of research")
@@ -98,7 +113,7 @@ class ResearchBrief(BaseModel):
 class StageResult(BaseModel):
     """Results from executing one stage"""
     model_config = ConfigDict(extra="forbid")
     stage_number: int
     status: Literal["success", "partial", "failed"]
     questionnaire_results: Optional[Dict[str, Any]] = None
@@ -114,7 +129,7 @@ class StageResult(BaseModel):
 class VerificationResult(BaseModel):
     """Result of verifying if data answers the question"""
     model_config = ConfigDict(extra="forbid")
     answers_question: bool = Field(description="Whether the data fully answers the question")
     missing_info: Optional[str] = Field(default=None, description="What information is missing")
     improvement_suggestion: Optional[str] = Field(default=None, description="How to improve the research brief")
@@ -125,24 +140,24 @@ class SurveyAnalysisState(TypedDict):
     # User interaction
     messages: Annotated[List, operator.add]
     user_question: str
     # Planning
     research_brief: Optional[ResearchBrief]
     # Stage execution
     current_stage: int  # Which stage we're executing (0-indexed internally, but 1-indexed in models)
     stage_results: List[StageResult]  # Results from each completed stage
     # Legacy single-stage results (for backward compatibility)
     questionnaire_results: Optional[Dict[str, Any]]
     toplines_results: Optional[Dict[str, Any]]
     crosstabs_results: Optional[Dict[str, Any]]
     sql_results: Optional[Dict[str, Any]]
     # Verification & synthesis
     verification: Optional[VerificationResult]
     final_answer: Optional[str]
     # Control flow
     retry_count: int
     max_retries: int
@@ -155,7 +170,7 @@ class SurveyAnalysisState(TypedDict):
 class SurveyAnalysisAgent:
     """
     Multi-agent system for analyzing survey data with staged research briefs.
     Flow:
     1. User asks question
     2. Research brief agent decides: simple (one-shot) or complex (staged)
@@ -163,12 +178,12 @@ class SurveyAnalysisAgent:
     4. For complex: execute stages sequentially, each using previous results
     5. Final synthesis combines all stage results
     """
     # TODO: REMOVE WHEN PIPELINES READY - START
     # Track which pipelines are currently available
-    AVAILABLE_PIPELINES = {"questionnaire"}  # Add "toplines", "crosstabs", "sql" as they become ready
     # TODO: REMOVE WHEN PIPELINES READY - END
     def __init__(
         self,
         openai_api_key: str,
@@ -181,13 +196,13 @@ class SurveyAnalysisAgent:
         self.pinecone_api_key = pinecone_api_key
         self.verbose = verbose
         self.max_retries = max_retries
         # Initialize LLM
         self.llm = ChatOpenAI(
             model=os.getenv("OPENAI_MODEL", "gpt-4o"),
             temperature=0
         )
         # Initialize questionnaire RAG
         if self.verbose:
             print("Initializing questionnaire RAG system...")
@@ -197,28 +212,38 @@ class SurveyAnalysisAgent:
             persist_directory=questionnaire_persist_dir,
             verbose=verbose
         )
         # Build the graph
         self.graph = self._build_graph()
         if self.verbose:
             print("✓ Survey analysis agent initialized with staged research capability")
     def _build_graph(self) -> StateGraph:
         """Build the LangGraph workflow with staged research support"""
         workflow = StateGraph(SurveyAnalysisState)
         # Add nodes
         workflow.add_node("generate_research_brief", self._generate_research_brief)
         workflow.add_node("execute_stage", self._execute_stage)
         workflow.add_node("extract_stage_context", self._extract_stage_context)
         workflow.add_node("verify_results", self._verify_results)
         workflow.add_node("synthesize_response", self._synthesize_response)
         # Define edges
         workflow.add_edge(START, "generate_research_brief")
         # After research brief, route based on action
         workflow.add_conditional_edges(
             "generate_research_brief",
@@ -229,10 +254,10 @@ class SurveyAnalysisAgent:
                 "execute_stage": "execute_stage"
             }
         )
         # After stage execution, extract context for next stage
         workflow.add_edge("execute_stage", "extract_stage_context")
         # After context extraction, decide next step
         workflow.add_conditional_edges(
             "extract_stage_context",
@@ -242,7 +267,7 @@ class SurveyAnalysisAgent:
                 "verify": "verify_results"       # All stages done, verify
             }
         )
         # After verification, decide next step
         workflow.add_conditional_edges(
             "verify_results",
@@ -253,27 +278,66 @@ class SurveyAnalysisAgent:
                 "give_up": "synthesize_response"
             }
         )
         # End after synthesis
         workflow.add_edge("synthesize_response", END)
         # Compile with memory
         memory = MemorySaver()
         return workflow.compile(checkpointer=memory)
     def _get_available_surveys_description(self) -> str:
         """Get formatted description of available surveys for LLM prompt"""
         survey_names = self.questionnaire_rag.get_available_survey_names()
         if not survey_names:
             return "No surveys currently loaded."
         lines = ["Available survey names in the system:"]
         for name in survey_names:
             lines.append(f"  - '{name}'")
         return "\n".join(lines)
     # TODO: REMOVE WHEN PIPELINES READY - START
     def _get_pipeline_status_description(self) -> str:
         """Get description of available vs unavailable pipelines"""
@@ -283,61 +347,101 @@ class SurveyAnalysisAgent:
             "crosstabs": "Pre-computed cross-tabulations by demographics",
             "sql": "Raw survey responses for custom analysis"
         }
         lines = []
         for pipeline, description in all_pipelines.items():
             status = "✅ AVAILABLE" if pipeline in self.AVAILABLE_PIPELINES else "❌ NOT YET AVAILABLE"
             lines.append(f"{pipeline.capitalize()}: {description} {status}")
         return "\n".join(lines)
     # TODO: REMOVE WHEN PIPELINES READY - END
     def _get_full_question_context(self, state: SurveyAnalysisState) -> str:
         """
         Build full question context from conversation history.
         IMPORTANT: Only look at the LATEST user message for the current query.
         Previous messages provide context but the latest message is what we're answering.
         """
         messages = state.get("messages", [])
         # Extract all human messages
         human_messages = []
         for msg in messages:
             if isinstance(msg, HumanMessage):
                 human_messages.append(msg.content)
         if not human_messages:
             return state["user_question"]
         # For planning, just use the latest message
         # Don't combine with previous messages as that causes misinterpretation
         latest_message = human_messages[-1]
         if self.verbose:
             print(f"📝 Conversation history: {len(human_messages)} user message(s)")
             for i, msg in enumerate(human_messages, 1):
                 print(f"   {i}. {msg[:100]}..." if len(msg) > 100 else f"   {i}. {msg}")
             print(f"🎯 Answering latest: {latest_message}")
         return latest_message
     # ========================================================================
     # NODE FUNCTIONS
     # ========================================================================
     def _generate_research_brief(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Generate research brief - decides single-stage vs multi-stage approach"""
         if self.verbose:
             print("\n=== GENERATING RESEARCH BRIEF ===")
-        # Get the current question (latest message only)
         question = self._get_full_question_context(state)
         retry_count = state.get("retry_count", 0)
         # Add context from verification if this is a retry
         verification_context = ""
         if state.get("verification") and retry_count > 0:
@@ -348,93 +452,34 @@ Previous attempt was insufficient:
 Please improve the research plan based on this feedback.
 """
-        system_prompt = f"""You are a research planning expert for survey data analysis.
-# TODO: REMOVE WHEN PIPELINES READY - Use dynamic status
-Available data sources:
-{self._get_pipeline_status_description()}
-# TODO: REMOVE WHEN PIPELINES READY - START
-⚠️ IMPORTANT: Currently ONLY the questionnaire pipeline is available.
-- Do NOT create research plans that require toplines, crosstabs, or SQL
-- If the user asks for results/data/analysis that requires those sources, use action="followup" to inform them
-- Focus on what CAN be answered with questionnaires alone (question text, response options, topics, skip logic)
-# TODO: REMOVE WHEN PIPELINES READY - END
-{self._get_available_surveys_description()}
-You have FOUR possible actions:
-**1. followup** - Ask clarifying question if ambiguous OR if user asks for unavailable data
-**2. answer** - Answer directly without data (system questions, general knowledge)
-**3. route_to_sources** - Simple query that can be answered with one-shot data retrieval
-   Use this for MOST queries including:
-   - "What questions were asked in June 2025?" ← Use this
-   - "Show me all healthcare questions" ← Use this
-   - "What about June 2025?" (when June 2022 was discussed before) ← Use this for the NEW date only
-   - ANY query asking about a SINGLE time period or survey ← Use this
-   CRITICAL: When user mentions a new time period, retrieve ONLY that period.
-   Do NOT create comparative queries unless explicitly requested!
-**4. execute_stages** - Complex query requiring SEQUENTIAL staged research
-   Use this ONLY for:
-   - Explicit comparisons: "compare 2024 vs 2025", "what changed between surveys"
-   - Queries with "most/least/best/worst" needing analysis across multiple retrievals
-   - Queries explicitly asking for differences or changes
-   DO NOT use stages for simple follow-up questions about different time periods!
-   # TODO: REMOVE WHEN PIPELINES READY - START
-   NOTE: Since toplines/crosstabs/SQL aren't available, only use execute_stages for explicit comparisons
-   # TODO: REMOVE WHEN PIPELINES READY - END
-CRITICAL RULES FOR CONVERSATION CONTEXT:
-- When user says "what about [X]?" they're asking a NEW question about X
-- Do NOT assume they want to compare with previous topics
-- "What about June 2025?" means "show me June 2025" (NOT "compare with June 2022")
-- Only create multi-stage queries when user EXPLICITLY asks to compare
-FILTERING RULES:
-- Extract survey name from user query and map to exact stored name
-- "Unity Poll" → "Vanderbilt_Unity_Poll"
-- Be precise with year and month extraction
-- If user just provides date/survey, infer they want questions from that period
-{verification_context}
-Examples:
-User: "what questions were asked in June 2025?"
-Brief: action=route_to_sources, retrieve June 2025 questions
-User: "what about June 2025?" (after discussing June 2022)
-Brief: action=route_to_sources, retrieve June 2025 questions ← NOT staged!
-User: "compare June 2024 vs June 2025"
-Brief: action=execute_stages, stage 1: 2024, stage 2: 2025 ← This needs stages
-User: "june 2022 unity poll"
-Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_Unity_Poll'
-"""
         brief_generator = self.llm.with_structured_output(ResearchBrief)
         brief = brief_generator.invoke([
             SystemMessage(content=system_prompt),
-            HumanMessage(content=f"User question: {question}\n\nGenerate a research brief.")
         ])
         if self.verbose:
             print(f"Action: {brief.action}")
             print(f"Reasoning: {brief.reasoning}")
             if brief.followup_question:
                 print(f"Follow-up: {brief.followup_question}")
             if brief.action == "route_to_sources" and brief.data_sources:
                 print(f"Simple query - {len(brief.data_sources)} data sources")
                 for ds in brief.data_sources:
@@ -442,7 +487,7 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                     print(f"  - {ds.source_type}: {ds.query_description}")
                     if filters_dict:
                         print(f"    Filters: {filters_dict}")
             if brief.action == "execute_stages" and brief.stages:
                 print(f"Staged query - {len(brief.stages)} stages")
                 for stage in brief.stages:
@@ -455,18 +500,18 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                         print(f"  - {ds.source_type}: {ds.query_description}")
                         if ds.result_label:
                             print(f"    Label: {ds.result_label}")
         return {
             "research_brief": brief,
             "current_stage": 0,  # Start at stage 0 (will execute stage 1 first)
             "stage_results": [],
             "messages": [AIMessage(content=f"[Research plan: {brief.action}]")]
         }
     def _route_after_brief(self, state: SurveyAnalysisState) -> str:
         """Route based on research brief action"""
         brief = state["research_brief"]
         if brief.action == "followup":
             return "followup"
         elif brief.action == "answer":
@@ -475,34 +520,34 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
             return "execute_stage"
         else:  # route_to_sources
             return "execute_stage"  # We'll handle both single and staged in execute_stage
     def _execute_stage(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Execute one stage of research (handles both single-stage and multi-stage)"""
         brief = state["research_brief"]
         current_stage_idx = state.get("current_stage", 0)
         previous_stage_results = state.get("stage_results", [])
         # Determine if this is single-stage or multi-stage
         if brief.action == "route_to_sources":
             # Single-stage: use data_sources directly
             if self.verbose:
                 print(f"\n=== EXECUTING SINGLE-STAGE RESEARCH ===")
             stage_data_sources = brief.data_sources
             stage_desc = "Single-stage retrieval"
         elif brief.action == "execute_stages":
             # Multi-stage: get current stage
             stage = brief.stages[current_stage_idx]
             if self.verbose:
                 print(f"\n=== EXECUTING STAGE {stage.stage_number}/{len(brief.stages)} ===")
                 print(f"Description: {stage.description}")
             stage_data_sources = stage.data_sources
             stage_desc = stage.description
             # If this stage depends on previous stages, enrich filters with context
             if stage.use_previous_results_for and previous_stage_results:
                 stage_data_sources = self._enrich_data_sources_with_context(
@@ -512,48 +557,52 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                 )
         else:
             return {}
         # Execute pipelines for this stage
         stage_result = StageResult(
             stage_number=current_stage_idx + 1,
             status="success"
         )
         # TODO: REMOVE WHEN PIPELINES READY - Track what was attempted vs available
         attempted_pipelines = []
         unavailable_pipelines = []
         # Run each pipeline
         for ds in stage_data_sources:
             filters_dict = {k: v for k, v in ds.filters.model_dump().items() if v is not None}
             # TODO: REMOVE WHEN PIPELINES READY - START
             attempted_pipelines.append(ds.source_type)
             # TODO: REMOVE WHEN PIPELINES READY - END
             if ds.source_type == "questionnaire":
                 if self.verbose:
-                    print(f"\nQuerying questionnaire: {ds.query_description}")
                     if filters_dict:
-                        print(f"Filters: {filters_dict}")
-                result = self.questionnaire_rag.query_with_metadata(
                     question=ds.query_description,
                     filters=filters_dict if filters_dict else None
                 )
                 # Store with label if provided
                 if ds.result_label:
                     result["label"] = ds.result_label
                 stage_result.questionnaire_results = result if stage_result.questionnaire_results is None else {
                     "multiple": True,
                     "results": [stage_result.questionnaire_results, result]
                 }
                 if self.verbose:
-                    print(f"Retrieved {result['num_sources']} questions")
             # TODO: REMOVE WHEN PIPELINES READY - START
             elif ds.source_type not in self.AVAILABLE_PIPELINES:
                 unavailable_pipelines.append(ds.source_type)
@@ -561,7 +610,103 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                     print(f"\n⚠️  {ds.source_type.upper()} pipeline not yet available - skipping")
                     print(f"   Requested: {ds.query_description}")
             # TODO: REMOVE WHEN PIPELINES READY - END
         # TODO: REMOVE WHEN PIPELINES READY - START
         # Add a note about unavailable pipelines to the stage result
         if unavailable_pipelines:
@@ -573,10 +718,10 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                 stage_result.extracted_context = {}
             stage_result.extracted_context["unavailable_pipelines"] = unavailable_pipelines
         # TODO: REMOVE WHEN PIPELINES READY - END
         # Add stage result to list
         updated_stage_results = previous_stage_results + [stage_result]
         # For single-stage, also populate legacy fields
         if brief.action == "route_to_sources":
             return {
@@ -587,12 +732,12 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                 "crosstabs_results": stage_result.crosstabs_results,
                 "sql_results": stage_result.sql_results
             }
         return {
             "stage_results": updated_stage_results,
             "current_stage": current_stage_idx + 1  # FIXED: Increment stage counter
         }
     def _enrich_data_sources_with_context(
         self,
         data_sources: List[DataSource],
@@ -600,10 +745,10 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
         use_instruction: str
     ) -> List[DataSource]:
         """Enrich data sources with context from previous stages"""
         if self.verbose:
             print(f"  Enriching with context: {use_instruction}")
         # For now, handle the most common case: extracting question IDs
         if "question" in use_instruction.lower() and "id" in use_instruction.lower():
             # Extract question IDs from previous questionnaire results
@@ -613,66 +758,66 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                     q_results = prev_result.questionnaire_results
                     if "source_questions" in q_results:
                         question_ids.extend([q.get("question_id") for q in q_results["source_questions"]])
             if question_ids and self.verbose:
                 print(f"  Found {len(question_ids)} question IDs from previous stages")
             # Add question_ids to filters
             enriched_sources = []
             for ds in data_sources:
                 new_filters = ds.filters.model_copy()
                 new_filters.question_ids = question_ids if question_ids else None
                 enriched_ds = ds.model_copy()
                 enriched_ds.filters = new_filters
                 enriched_sources.append(enriched_ds)
             return enriched_sources
         return data_sources
     def _extract_stage_context(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Extract key context from completed stage for use in next stages"""
         stage_results = state.get("stage_results", [])
         if not stage_results:
             return {}
         current_result = stage_results[-1]
         # Extract question IDs if questionnaire results exist
         extracted_context = {}
         if current_result.questionnaire_results:
             q_results = current_result.questionnaire_results
             if "source_questions" in q_results:
                 question_ids = [q.get("question_id") for q in q_results["source_questions"]]
                 extracted_context["question_ids"] = question_ids
                 if self.verbose:
                     print(f"\n=== EXTRACTED CONTEXT FROM STAGE {current_result.stage_number} ===")
                     print(f"Question IDs: {len(question_ids)} extracted")
         # Update the stage result with extracted context
         current_result.extracted_context = extracted_context
         return {}
     def _route_after_stage(self, state: SurveyAnalysisState) -> str:
         """Decide if we need to execute another stage or move to verification"""
         brief = state["research_brief"]
         current_stage_idx = state.get("current_stage", 0)
         # Single-stage query
         if brief.action == "route_to_sources":
             if self.verbose:
                 print("\n=== SINGLE-STAGE COMPLETE → VERIFICATION ===")
             return "verify"
         # Multi-stage query
         total_stages = len(brief.stages)
         # FIXED: Don't add 1 here since current_stage was already incremented
         if current_stage_idx < total_stages:
             if self.verbose:
@@ -682,84 +827,190 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
             if self.verbose:
                 print(f"\n=== ALL {total_stages} STAGES COMPLETE → VERIFICATION ===")
             return "verify"
     def _verify_results(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Verify that retrieved data answers the question"""
         if self.verbose:
             print("\n=== VERIFYING RESULTS ===")
         # Use the latest question only
         question = self._get_full_question_context(state)
         stage_results = state.get("stage_results", [])
         brief = state["research_brief"]
-        # Build summary of what we retrieved
         retrieval_summary = []
         total_questions = 0
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_pipelines_found = []
         # TODO: REMOVE WHEN PIPELINES READY - END
         for stage_result in stage_results:
             if stage_result.questionnaire_results:
                 q_res = stage_result.questionnaire_results
-                num = q_res.get("num_sources", 0)
-                total_questions += num
-                retrieval_summary.append(f"Stage {stage_result.stage_number}: Retrieved {num} questions")
             # TODO: REMOVE WHEN PIPELINES READY - START
             # Check if any pipelines were unavailable
             if stage_result.extracted_context and "unavailable_pipelines" in stage_result.extracted_context:
                 unavailable = stage_result.extracted_context["unavailable_pipelines"]
                 unavailable_pipelines_found.extend(unavailable)
-                retrieval_summary.append(f"Stage {stage_result.stage_number}: ⚠️  {', '.join(unavailable)} not yet available")
             # TODO: REMOVE WHEN PIPELINES READY - END
         if not retrieval_summary:
             retrieval_summary.append("No data was retrieved")
-        # Simple heuristic: if this is a single-stage simple query and we got results, auto-pass
-        if brief.action == "route_to_sources" and len(stage_results) == 1 and total_questions > 0:
-            # Check if question is a simple "what questions" type query
-            question_lower = question.lower()
-            simple_patterns = ["what question", "which question", "list question", "show question", "questions asked"]
-            if any(pattern in question_lower for pattern in simple_patterns):
-                if self.verbose:
-                    print(f"✓ Auto-pass: Simple question retrieval with {total_questions} results")
-                return {
-                    "verification": VerificationResult(
-                        answers_question=True,
-                        missing_info=None,
-                        improvement_suggestion=None
-                    )
-                }
-        # TODO: REMOVE WHEN PIPELINES READY - START
-        # If we have unavailable pipelines but got questionnaire data, auto-pass with note
-        if unavailable_pipelines_found and total_questions > 0:
-            if self.verbose:
-                print(f"✓ Auto-pass: Got questionnaire data, {len(unavailable_pipelines_found)} pipeline(s) not yet available")
-            return {
-                "verification": VerificationResult(
-                    answers_question=True,
-                    missing_info=None,
-                    improvement_suggestion=None
-                )
-            }
-        # TODO: REMOVE WHEN PIPELINES READY - END
-        # If we got 0 results, auto-fail without calling LLM
-        # CRITICAL: Don't retry - set retry_count to max to skip retry loop
-        if total_questions == 0:
             if self.verbose:
                 print("✗ Auto-fail: No results retrieved (skipping retry - data doesn't exist)")
             return {
                 "verification": VerificationResult(
                     answers_question=False,
@@ -768,67 +1019,48 @@ Brief: action=route_to_sources, year=2022, month=June, survey_name='Vanderbilt_U
                 ),
                 "retry_count": state.get("max_retries", self.max_retries)  # FIXED: Skip retry
             }
-        # For other cases, use LLM verification
-        system_prompt = """You are a verification expert. Your ONLY job is to check if the retrieved data matches what the user asked for.
-CRITICAL RULES:
-1. **Match the question literally** - Don't add requirements the user didn't ask for
-   - If they asked "what questions were asked?" and we retrieved questions → SUCCESS
-   - If they asked "what are the results?" and we only have questions → FAILURE
-2. **Don't overthink it** - Keep it simple:
-   - Did we retrieve the type of data they asked for? (questions, results, etc.)
-   - Is it from the right time period/survey they specified?
-   - Is there enough data (at least 1 result)?
-3. **Only fail if there's an actual problem**:
-   - We retrieved the wrong type of data (e.g., questions when they asked for results)
-   - We retrieved from the wrong time period/survey
-4. **Do NOT fail if**:
-   - User asked for questions and we got questions (even if we don't have "analysis")
-   - User asked for data from June 2025 and that's what we got
-   - The data seems sufficient to answer their actual question
-Be practical, not pedantic. If the retrieved data can answer what they asked, approve it.
-"""
         verifier = self.llm.with_structured_output(VerificationResult)
-        verification = verifier.invoke([
-            SystemMessage(content=system_prompt),
-            HumanMessage(content=f"""
-User question: "{question}"
-What we retrieved:
-{chr(10).join(retrieval_summary)}
-Simple question: Can we answer their question with this data? YES or NO.
-""")
         ])
         if self.verbose:
             print(f"Answers question: {verification.answers_question}")
             if not verification.answers_question:
                 print(f"Missing: {verification.missing_info}")
                 print(f"Suggestion: {verification.improvement_suggestion}")
         # Increment retry count if verification fails
         updates = {"verification": verification}
         if not verification.answers_question:
             current_retry = state.get("retry_count", 0)
             updates["retry_count"] = current_retry + 1
         return updates
     def _route_after_verification(self, state: SurveyAnalysisState) -> str:
         """Route based on verification result"""
         verification = state["verification"]
         retry_count = state.get("retry_count", 0)
         max_retries = state.get("max_retries", self.max_retries)
         if verification.answers_question:
             return "synthesize"
         elif retry_count < max_retries:
@@ -839,18 +1071,18 @@ Simple question: Can we answer their question with this data? YES or NO.
             if self.verbose:
                 print(f"\n⚠️  Max retries reached, proceeding with partial results")
             return "give_up"
     def _synthesize_response(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Synthesize final response from all results"""
         if self.verbose:
             print("\n=== SYNTHESIZING RESPONSE ===")
         brief = state["research_brief"]
         # Use the latest question
         full_question = self._get_full_question_context(state)
         # Handle followup action
         if brief.action == "followup":
             if self.verbose:
@@ -859,7 +1091,7 @@ Simple question: Can we answer their question with this data? YES or NO.
                 "final_answer": brief.followup_question,
                 "messages": [AIMessage(content=brief.followup_question)]
             }
         # Handle direct answer (no data retrieval)
         if brief.action == "answer":
             if self.verbose:
@@ -868,15 +1100,15 @@ Simple question: Can we answer their question with this data? YES or NO.
                 SystemMessage(content="Answer the user's question directly."),
                 HumanMessage(content=full_question)
             ]).content
             return {
                 "final_answer": answer,
                 "messages": [AIMessage(content=answer)]
             }
         # Get stage results
         stage_results = state.get("stage_results", [])
         if not stage_results:
             if self.verbose:
                 print("No stage results available")
@@ -884,49 +1116,153 @@ Simple question: Can we answer their question with this data? YES or NO.
                 "final_answer": "I was unable to retrieve any data to answer your question.",
                 "messages": [AIMessage(content="I was unable to retrieve any data to answer your question.")]
             }
-        # CASE 1: Single stage with single pipeline → return direct answer
-        if len(stage_results) == 1:
-            stage_result = stage_results[0]
-            # Check if only one pipeline returned data
-            pipelines_with_data = 0
-            direct_answer = None
-            if stage_result.questionnaire_results:
-                pipelines_with_data += 1
-                direct_answer = stage_result.questionnaire_results.get("answer")
-            if pipelines_with_data == 1 and direct_answer:
-                if self.verbose:
-                    print("Single stage, single pipeline - returning direct answer (no synthesis)")
-                return {
-                    "final_answer": direct_answer,
-                    "messages": [AIMessage(content=direct_answer)]
-                }
-        # CASE 2: Multiple stages or multiple pipelines → synthesize
         if self.verbose:
-            print(f"Synthesizing from {len(stage_results)} stage(s)")
-        # Build context from all stages
         context_parts = []
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_pipelines_overall = []
         # TODO: REMOVE WHEN PIPELINES READY - END
         for i, stage_result in enumerate(stage_results, 1):
             if stage_result.questionnaire_results:
                 q_res = stage_result.questionnaire_results
-                # Check if this is a labeled result
-                label = q_res.get("label", f"Stage {i}")
-                context_parts.append(f"\n=== {label.upper()} ===")
-                context_parts.append(f"Stage {i} results:")
-                context_parts.append(q_res.get("answer", "No answer available"))
             # TODO: REMOVE WHEN PIPELINES READY - START
             # Track unavailable pipelines for note in synthesis
             if stage_result.extracted_context and "unavailable_pipelines" in stage_result.extracted_context:
@@ -934,7 +1270,7 @@ Simple question: Can we answer their question with this data? YES or NO.
                 unavailable_pipelines_overall.extend(unavailable)
                 context_parts.append(f"\n⚠️  Note: {', '.join(unavailable)} data was requested but not yet available")
             # TODO: REMOVE WHEN PIPELINES READY - END
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_note = ""
         if unavailable_pipelines_overall:
@@ -947,57 +1283,46 @@ Simple question: Can we answer their question with this data? YES or NO.
 Please answer based on the questionnaire data that IS available, and note any limitations.
 """
         # TODO: REMOVE WHEN PIPELINES READY - END
-        synthesis_prompt = f"""Synthesize results from {'multiple stages' if len(stage_results) > 1 else 'the research'} to answer the user's question.
-User question: {full_question}
-Research plan: {brief.reasoning}
-Retrieved data:
-{chr(10).join(context_parts)}
-{unavailable_note}
-Instructions:
-- If this is a comparative query, clearly organize by the comparison dimensions
-- If this is an analytical query (most/least/best/worst), perform the analysis
-- Preserve important details from the research
-- Use natural language, be clear and organized
-- Cite which poll(s) or stage(s) information comes from
-- Do NOT make up information not in the retrieved data
-- TODO: REMOVE WHEN PIPELINES READY - If some data sources weren't available, clearly state this and explain what you CAN provide
-"""
         final_answer = self.llm.invoke([
-            SystemMessage(content="You are a survey data analyst synthesizing research results."),
             HumanMessage(content=synthesis_prompt)
         ]).content
         if self.verbose:
             print("Synthesis complete")
         return {
             "final_answer": final_answer,
             "messages": [AIMessage(content=final_answer)]
         }
     # ========================================================================
     # PUBLIC API
     # ========================================================================
     def query(self, question: str, thread_id: str = "default") -> str:
         """
         Query the survey analysis system.
         Args:
             question: User's question
             thread_id: Conversation thread ID for memory
         Returns:
             Answer string
         """
         # Create initial state for this turn
         initial_state = {
             "messages": [HumanMessage(content=question)],
@@ -1014,22 +1339,22 @@ Instructions:
             "retry_count": 0,
             "max_retries": self.max_retries
         }
         config = {
             "configurable": {"thread_id": thread_id},
             "recursion_limit": 50  # FIXED: Increased from default 25
         }
         if self.verbose:
             print(f"\n🧵 Thread ID: {thread_id}")
         final_state = self.graph.invoke(initial_state, config)
         return final_state["final_answer"]
     def stream_query(self, question: str, thread_id: str = "default"):
         """Stream the query execution for real-time updates"""
         initial_state = {
             "messages": [HumanMessage(content=question)],
             "user_question": question,
@@ -1045,12 +1370,12 @@ Instructions:
             "retry_count": 0,
             "max_retries": self.max_retries
         }
         config = {
             "configurable": {"thread_id": thread_id},
             "recursion_limit": 50  # FIXED: Increased from default 25
         }
         for event in self.graph.stream(initial_state, config):
             yield event
@@ -1062,37 +1387,37 @@ Instructions:
 def main():
     """Interactive CLI"""
     import sys
     openai_api_key = os.getenv("OPENAI_API_KEY")
     pinecone_api_key = os.getenv("PINECONE_API_KEY")
     if not openai_api_key or not pinecone_api_key:
         print("Error: Missing API keys")
         print("Set OPENAI_API_KEY and PINECONE_API_KEY environment variables")
         sys.exit(1)
     print("Initializing survey analysis agent...")
     agent = SurveyAnalysisAgent(
         openai_api_key=openai_api_key,
         pinecone_api_key=pinecone_api_key,
         verbose=True
     )
     print("\n" + "="*80)
     print("SURVEY ANALYSIS AGENT (WITH STAGED RESEARCH)")
     print("="*80)
     print("\nType 'quit' to exit\n")
     thread_id = "cli_session"
     while True:
         try:
             question = input("\nYour question: ").strip()
             if not question or question.lower() in ['quit', 'exit', 'q']:
                 print("\nGoodbye!")
                 break
             print("\n" + "-"*80)
             answer = agent.query(question, thread_id=thread_id)
             print("\n" + "="*80)
@@ -1100,7 +1425,7 @@ def main():
             print("="*80)
             print(answer)
             print("="*80)
         except KeyboardInterrupt:
             print("\n\nGoodbye!")
             break
@@ -1111,4 +1436,4 @@ def main():
 if __name__ == "__main__":
-    main()

 Current Status:
 - ✅ Questionnaire pipeline: ACTIVE
+- ✅ Toplines pipeline: ACTIVE
+- ✅ Crosstabs pipeline: ACTIVE
 - ⏳ SQL pipeline: Not yet implemented
 """
 # Import the questionnaire RAG
 from questionnaire_rag import QuestionnaireRAG
+from toplines_rag import ToplinesRAG
+from crosstab_rag import CrosstabsRAG
+# ============================================================================
+# PROMPT LOADING UTILITIES
+# ============================================================================
+def _load_prompt_file(filename: str) -> str:
+    """Load a prompt file from the prompts directory"""
+    prompt_dir = Path(__file__).parent / "prompts"
+    prompt_path = prompt_dir / filename
+    if not prompt_path.exists():
+        raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
+    return prompt_path.read_text(encoding="utf-8")
 try:
     from dotenv import load_dotenv
 class QueryFilters(BaseModel):
     """Filters for data source queries - Pydantic v2 with strict schema"""
     model_config = ConfigDict(extra="forbid")
     year: Optional[int] = Field(default=None, description="Year filter (e.g., 2025)")
     month: Optional[str] = Field(default=None, description="Month filter (e.g., 'February')")
     poll_date: Optional[str] = Field(default=None, description="Specific poll date (e.g., '2025-02-15')")
 class DataSource(BaseModel):
     """Represents a data source to query"""
     model_config = ConfigDict(extra="forbid")
     source_type: Literal["questionnaire", "toplines", "crosstabs", "sql"]
     query_description: str = Field(description="What to retrieve from this source")
     filters: QueryFilters = Field(default_factory=QueryFilters, description="Filters to apply")
 class ResearchStage(BaseModel):
     """A single stage in a multi-stage research plan"""
     model_config = ConfigDict(extra="forbid")
     stage_number: int = Field(description="Stage number (1-indexed)")
     description: str = Field(description="What this stage accomplishes")
     data_sources: List[DataSource] = Field(description="Data sources to query in this stage")
     depends_on_stages: List[int] = Field(default_factory=list, description="Which prior stages this depends on")
     use_previous_results_for: Optional[str] = Field(
+        default=None,
         description="How to use previous stage results (e.g., 'Extract question IDs from stage 1')"
     )
 class ResearchBrief(BaseModel):
     """Research brief - can be either single-stage or multi-stage"""
     model_config = ConfigDict(extra="forbid")
     action: Literal["answer", "followup", "route_to_sources", "execute_stages"]
     followup_question: Optional[str] = Field(default=None, description="Follow-up question to ask user")
     reasoning: str = Field(description="Why this approach was chosen")
     # For simple queries (single-stage)
     data_sources: List[DataSource] = Field(default_factory=list, description="Data sources for simple queries")
     # For complex queries (multi-stage)
     stages: List[ResearchStage] = Field(default_factory=list, description="Ordered stages of research")
 class StageResult(BaseModel):
     """Results from executing one stage"""
     model_config = ConfigDict(extra="forbid")
     stage_number: int
     status: Literal["success", "partial", "failed"]
     questionnaire_results: Optional[Dict[str, Any]] = None
 class VerificationResult(BaseModel):
     """Result of verifying if data answers the question"""
     model_config = ConfigDict(extra="forbid")
     answers_question: bool = Field(description="Whether the data fully answers the question")
     missing_info: Optional[str] = Field(default=None, description="What information is missing")
     improvement_suggestion: Optional[str] = Field(default=None, description="How to improve the research brief")
     # User interaction
     messages: Annotated[List, operator.add]
     user_question: str
     # Planning
     research_brief: Optional[ResearchBrief]
     # Stage execution
     current_stage: int  # Which stage we're executing (0-indexed internally, but 1-indexed in models)
     stage_results: List[StageResult]  # Results from each completed stage
     # Legacy single-stage results (for backward compatibility)
     questionnaire_results: Optional[Dict[str, Any]]
     toplines_results: Optional[Dict[str, Any]]
     crosstabs_results: Optional[Dict[str, Any]]
     sql_results: Optional[Dict[str, Any]]
     # Verification & synthesis
     verification: Optional[VerificationResult]
     final_answer: Optional[str]
     # Control flow
     retry_count: int
     max_retries: int
 class SurveyAnalysisAgent:
     """
     Multi-agent system for analyzing survey data with staged research briefs.
     Flow:
     1. User asks question
     2. Research brief agent decides: simple (one-shot) or complex (staged)
     4. For complex: execute stages sequentially, each using previous results
     5. Final synthesis combines all stage results
     """
     # TODO: REMOVE WHEN PIPELINES READY - START
     # Track which pipelines are currently available
+    AVAILABLE_PIPELINES = {"questionnaire", "toplines", "crosstabs"}  # Add "sql" as it becomes ready
     # TODO: REMOVE WHEN PIPELINES READY - END
     def __init__(
         self,
         openai_api_key: str,
         self.pinecone_api_key = pinecone_api_key
         self.verbose = verbose
         self.max_retries = max_retries
         # Initialize LLM
         self.llm = ChatOpenAI(
             model=os.getenv("OPENAI_MODEL", "gpt-4o"),
             temperature=0
         )
         # Initialize questionnaire RAG
         if self.verbose:
             print("Initializing questionnaire RAG system...")
             persist_directory=questionnaire_persist_dir,
             verbose=verbose
         )
+        # Initialize toplines RAG
+        if self.verbose:
+            print("Initializing toplines RAG system...")
+        self.toplines_rag = ToplinesRAG()
+        # Initialize crosstabs RAG (pass questionnaire_rag to reuse question matching)
+        if self.verbose:
+            print("Initializing crosstabs RAG system...")
+        self.crosstab_rag = CrosstabsRAG(questionnaire_rag=self.questionnaire_rag, verbose=self.verbose)
         # Build the graph
         self.graph = self._build_graph()
         if self.verbose:
             print("✓ Survey analysis agent initialized with staged research capability")
     def _build_graph(self) -> StateGraph:
         """Build the LangGraph workflow with staged research support"""
         workflow = StateGraph(SurveyAnalysisState)
         # Add nodes
         workflow.add_node("generate_research_brief", self._generate_research_brief)
         workflow.add_node("execute_stage", self._execute_stage)
         workflow.add_node("extract_stage_context", self._extract_stage_context)
         workflow.add_node("verify_results", self._verify_results)
         workflow.add_node("synthesize_response", self._synthesize_response)
         # Define edges
         workflow.add_edge(START, "generate_research_brief")
         # After research brief, route based on action
         workflow.add_conditional_edges(
             "generate_research_brief",
                 "execute_stage": "execute_stage"
             }
         )
         # After stage execution, extract context for next stage
         workflow.add_edge("execute_stage", "extract_stage_context")
         # After context extraction, decide next step
         workflow.add_conditional_edges(
             "extract_stage_context",
                 "verify": "verify_results"       # All stages done, verify
             }
         )
         # After verification, decide next step
         workflow.add_conditional_edges(
             "verify_results",
                 "give_up": "synthesize_response"
             }
         )
         # End after synthesis
         workflow.add_edge("synthesize_response", END)
         # Compile with memory
         memory = MemorySaver()
         return workflow.compile(checkpointer=memory)
     def _get_available_surveys_description(self) -> str:
         """Get formatted description of available surveys for LLM prompt"""
         survey_names = self.questionnaire_rag.get_available_survey_names()
         if not survey_names:
             return "No surveys currently loaded."
         lines = ["Available survey names in the system:"]
         for name in survey_names:
             lines.append(f"  - '{name}'")
+        return "\n".join(lines)
+    def _get_available_months_for_year(self, year: int, survey_name: str = "Vanderbilt_Unity_Poll") -> List[str]:
+        """Get list of available months for a given year and survey, sorted chronologically"""
+        month_order = ["January", "February", "March", "April", "May", "June",
+                      "July", "August", "September", "October", "November", "December"]
+        months = []
+        catalog = self.questionnaire_rag.poll_catalog
+        for poll_date, info in catalog.items():
+            if info.get("year") == year and info.get("survey_name") == survey_name:
+                month = info.get("month")
+                if month:
+                    months.append(month)
+        # Sort chronologically
+        return sorted(months, key=lambda m: month_order.index(m) if m in month_order else 999)
+    def _get_available_months_description(self) -> str:
+        """Get formatted description of available months by year for LLM prompt"""
+        month_order = ["January", "February", "March", "April", "May", "June",
+                      "July", "August", "September", "October", "November", "December"]
+        catalog = self.questionnaire_rag.poll_catalog
+        years = {}
+        for poll_date, info in catalog.items():
+            year = info.get("year")
+            month = info.get("month")
+            survey = info.get("survey_name")
+            if year and month and survey == "Vanderbilt_Unity_Poll":
+                if year not in years:
+                    years[year] = []
+                if month not in years[year]:
+                    years[year].append(month)
+        lines = ["Available polls by year (Vanderbilt Unity Poll):"]
+        for year in sorted(years.keys()):
+            # Sort months chronologically
+            months_sorted = sorted(years[year], key=lambda m: month_order.index(m) if m in month_order else 999)
+            months_str = ", ".join(months_sorted)
+            lines.append(f"  {year}: {months_str}")
         return "\n".join(lines)
     # TODO: REMOVE WHEN PIPELINES READY - START
     def _get_pipeline_status_description(self) -> str:
         """Get description of available vs unavailable pipelines"""
             "crosstabs": "Pre-computed cross-tabulations by demographics",
             "sql": "Raw survey responses for custom analysis"
         }
         lines = []
         for pipeline, description in all_pipelines.items():
             status = "✅ AVAILABLE" if pipeline in self.AVAILABLE_PIPELINES else "❌ NOT YET AVAILABLE"
             lines.append(f"{pipeline.capitalize()}: {description} {status}")
         return "\n".join(lines)
     # TODO: REMOVE WHEN PIPELINES READY - END
     def _get_full_question_context(self, state: SurveyAnalysisState) -> str:
         """
         Build full question context from conversation history.
         IMPORTANT: Only look at the LATEST user message for the current query.
         Previous messages provide context but the latest message is what we're answering.
         """
         messages = state.get("messages", [])
         # Extract all human messages
         human_messages = []
         for msg in messages:
             if isinstance(msg, HumanMessage):
                 human_messages.append(msg.content)
         if not human_messages:
             return state["user_question"]
         # For planning, just use the latest message
         # Don't combine with previous messages as that causes misinterpretation
         latest_message = human_messages[-1]
         if self.verbose:
             print(f"📝 Conversation history: {len(human_messages)} user message(s)")
             for i, msg in enumerate(human_messages, 1):
                 print(f"   {i}. {msg[:100]}..." if len(msg) > 100 else f"   {i}. {msg}")
             print(f"🎯 Answering latest: {latest_message}")
         return latest_message
     # ========================================================================
     # NODE FUNCTIONS
     # ========================================================================
     def _generate_research_brief(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Generate research brief - decides single-stage vs multi-stage approach"""
         if self.verbose:
             print("\n=== GENERATING RESEARCH BRIEF ===")
+        # Get conversation history for context
+        messages = state.get("messages", [])
+        human_messages = [msg.content for msg in messages if isinstance(msg, HumanMessage)]
+        # Get the current question (latest message)
         question = self._get_full_question_context(state)
+        # Build conversation context for the prompt
+        conversation_context = ""
+        if len(human_messages) > 1:
+            conversation_context = "\n\nCONVERSATION HISTORY (for context):\n"
+            previous_questions = []
+            for i, msg in enumerate(human_messages[:-1], 1):  # All except the latest
+                conversation_context += f"  {i}. {msg}\n"
+                previous_questions.append(msg)
+            conversation_context += f"\nCurrent question: {question}\n"
+            # Check if current question is a short answer (like "June", "February", "2024")
+            # This suggests it's answering a previous followup question
+            is_short_answer = len(question.split()) <= 2 and any(word.lower() in ['june', 'february', 'march', 'april', 'may', 'july', 'august', 'september', 'october', 'november', 'december', 'january'] or word.isdigit() for word in question.split())
+            if is_short_answer and previous_questions:
+                # Reconstruct the original intent from the previous question
+                original_question = previous_questions[-1]
+                conversation_context += f"\n🚨 IMPORTANT: The current question '{question}' is a SHORT ANSWER to a previous followup.\n"
+                conversation_context += f"Original question was: '{original_question}'\n"
+                conversation_context += f"You MUST combine '{question}' with the original intent from '{original_question}'.\n"
+                conversation_context += "- If original question asked about 'approval', 'ratings', 'responses', 'percentages' → use TOPLINES pipeline\n"
+                conversation_context += "- If original question asked about 'questions', 'what was asked' → use QUESTIONNAIRE pipeline\n"
+                conversation_context += "- If original question asked about 'vary by', 'breakdown by', 'by gender/age/etc' → use CROSSTABS pipeline\n"
+                conversation_context += "- Extract year/month from the short answer and combine with original question's intent\n"
+                conversation_context += f"- Example: Original='Trump's approval in 2025?', Current='June' → Query: 'Trump's approval in June 2025' using TOPLINES\n"
+            conversation_context += "\n⚠️ CRITICAL: Use conversation history to infer intent:\n"
+            conversation_context += "- If user says 'these questions', 'for each of these', 'all of them', etc., they're referencing previous results\n"
+            conversation_context += "- Infer time periods (year, month) from previous USER QUESTIONS if current question doesn't specify\n"
+            conversation_context += "- Example: If previous question was 'what questions about economy in 2025?', infer that 2025 economy questions were retrieved\n"
+            conversation_context += "- For 2025, available months are: February, June (from the available polls description)\n"
+            conversation_context += "- When user asks 'how do responses vary by X for each of these questions', create stages for EACH available month from the previous question's time period\n"
+            conversation_context += "- If previous question mentioned a year (e.g., 'in 2025'), use ALL available months for that year in the current analysis\n"
+            conversation_context += "- DO NOT ask followup for month/year if you can infer it from the previous user question\n"
         retry_count = state.get("retry_count", 0)
         # Add context from verification if this is a retry
         verification_context = ""
         if state.get("verification") and retry_count > 0:
 Please improve the research plan based on this feedback.
 """
+        # Load research brief prompt from file
+        system_prompt_template = _load_prompt_file("research_brief_prompt.txt")
+        system_prompt = system_prompt_template.format(
+            available_pipelines=self._get_pipeline_status_description(),
+            available_surveys=self._get_available_surveys_description(),
+            available_months=self._get_available_months_description(),
+            verification_context=verification_context
+        )
         brief_generator = self.llm.with_structured_output(ResearchBrief)
+        user_prompt = f"User question: {question}\n\nGenerate a research brief."
+        if conversation_context:
+            user_prompt = conversation_context + "\n\n" + user_prompt
         brief = brief_generator.invoke([
             SystemMessage(content=system_prompt),
+            HumanMessage(content=user_prompt)
         ])
         if self.verbose:
             print(f"Action: {brief.action}")
             print(f"Reasoning: {brief.reasoning}")
             if brief.followup_question:
                 print(f"Follow-up: {brief.followup_question}")
             if brief.action == "route_to_sources" and brief.data_sources:
                 print(f"Simple query - {len(brief.data_sources)} data sources")
                 for ds in brief.data_sources:
                     print(f"  - {ds.source_type}: {ds.query_description}")
                     if filters_dict:
                         print(f"    Filters: {filters_dict}")
             if brief.action == "execute_stages" and brief.stages:
                 print(f"Staged query - {len(brief.stages)} stages")
                 for stage in brief.stages:
                         print(f"  - {ds.source_type}: {ds.query_description}")
                         if ds.result_label:
                             print(f"    Label: {ds.result_label}")
         return {
             "research_brief": brief,
             "current_stage": 0,  # Start at stage 0 (will execute stage 1 first)
             "stage_results": [],
             "messages": [AIMessage(content=f"[Research plan: {brief.action}]")]
         }
     def _route_after_brief(self, state: SurveyAnalysisState) -> str:
         """Route based on research brief action"""
         brief = state["research_brief"]
         if brief.action == "followup":
             return "followup"
         elif brief.action == "answer":
             return "execute_stage"
         else:  # route_to_sources
             return "execute_stage"  # We'll handle both single and staged in execute_stage
     def _execute_stage(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Execute one stage of research (handles both single-stage and multi-stage)"""
         brief = state["research_brief"]
         current_stage_idx = state.get("current_stage", 0)
         previous_stage_results = state.get("stage_results", [])
         # Determine if this is single-stage or multi-stage
         if brief.action == "route_to_sources":
             # Single-stage: use data_sources directly
             if self.verbose:
                 print(f"\n=== EXECUTING SINGLE-STAGE RESEARCH ===")
             stage_data_sources = brief.data_sources
             stage_desc = "Single-stage retrieval"
         elif brief.action == "execute_stages":
             # Multi-stage: get current stage
             stage = brief.stages[current_stage_idx]
             if self.verbose:
                 print(f"\n=== EXECUTING STAGE {stage.stage_number}/{len(brief.stages)} ===")
                 print(f"Description: {stage.description}")
             stage_data_sources = stage.data_sources
             stage_desc = stage.description
             # If this stage depends on previous stages, enrich filters with context
             if stage.use_previous_results_for and previous_stage_results:
                 stage_data_sources = self._enrich_data_sources_with_context(
                 )
         else:
             return {}
         # Execute pipelines for this stage
         stage_result = StageResult(
             stage_number=current_stage_idx + 1,
             status="success"
         )
         # TODO: REMOVE WHEN PIPELINES READY - Track what was attempted vs available
         attempted_pipelines = []
         unavailable_pipelines = []
         # Run each pipeline
         for ds in stage_data_sources:
             filters_dict = {k: v for k, v in ds.filters.model_dump().items() if v is not None}
             # TODO: REMOVE WHEN PIPELINES READY - START
             attempted_pipelines.append(ds.source_type)
             # TODO: REMOVE WHEN PIPELINES READY - END
             if ds.source_type == "questionnaire":
                 if self.verbose:
+                    print(f"\n📊 [Questionnaire Pipeline] Retrieving raw data from QuestionnaireRAG vectorstore")
+                    print(f"   Query: {ds.query_description}")
                     if filters_dict:
+                        print(f"   Filters: {filters_dict}")
+                result = self.questionnaire_rag.retrieve_raw_data(
                     question=ds.query_description,
                     filters=filters_dict if filters_dict else None
                 )
                 # Store with label if provided
                 if ds.result_label:
                     result["label"] = ds.result_label
                 stage_result.questionnaire_results = result if stage_result.questionnaire_results is None else {
                     "multiple": True,
                     "results": [stage_result.questionnaire_results, result]
                 }
                 if self.verbose:
+                    print(f"✅ [Questionnaire Pipeline] Retrieved {result['num_sources']} question(s) from QuestionnaireRAG vectorstore")
+                    if result.get("source_questions"):
+                        question_vars = [q.get("variable_name", "unknown") for q in result["source_questions"][:3]]
+                        print(f"   Variables: {', '.join(question_vars)}{' ...' if len(result['source_questions']) > 3 else ''}")
             # TODO: REMOVE WHEN PIPELINES READY - START
             elif ds.source_type not in self.AVAILABLE_PIPELINES:
                 unavailable_pipelines.append(ds.source_type)
                     print(f"\n⚠️  {ds.source_type.upper()} pipeline not yet available - skipping")
                     print(f"   Requested: {ds.query_description}")
             # TODO: REMOVE WHEN PIPELINES READY - END
+            # Toplines pipeline implementation (now available)
+            elif ds.source_type == "toplines":
+                if self.verbose:
+                    print(f"\n📊 [Toplines Pipeline] Retrieving raw data from ToplinesRAG vectorstore")
+                    print(f"   Query: {ds.query_description}")
+                    if filters_dict:
+                        print(f"   Filters: {filters_dict}")
+                try:
+                    # Retrieve raw data without LLM synthesis
+                    toplines_result = self.toplines_rag.retrieve_raw_data(
+                        query=ds.query_description,
+                        filters=filters_dict if filters_dict else None,
+                        top_k=10
+                    )
+                    if ds.result_label:
+                        toplines_result["label"] = ds.result_label
+                    stage_result.toplines_results = toplines_result if stage_result.toplines_results is None else {
+                        "multiple": True,
+                        "results": [stage_result.toplines_results, toplines_result]
+                    }
+                    if self.verbose:
+                        print(f"✅ [Toplines Pipeline] Retrieved {toplines_result['num_sources']} topline document(s) from ToplinesRAG vectorstore")
+                except Exception as e:
+                    if self.verbose:
+                        print(f"⚠️  Error querying toplines: {e}")
+                    stage_result.status = "partial"
+                    if not stage_result.extracted_context:
+                        stage_result.extracted_context = {}
+                    stage_result.extracted_context["toplines_error"] = str(e)
+            # Crosstabs pipeline implementation (now available)
+            elif ds.source_type == "crosstabs":
+                if self.verbose:
+                    print(f"\n📊 [Crosstabs Pipeline] Retrieving raw data from CrosstabsRAG")
+                    print(f"   Query description: {ds.query_description}")
+                    if filters_dict:
+                        print(f"   Filters: {filters_dict}")
+                try:
+                    # Build query string from query_description and filters
+                    # CrosstabsRAG.retrieve_raw_data() expects a full query string that includes year/month/poll info
+                    query_text = ds.query_description
+                    # Enhance query with filter information if available
+                    if filters_dict:
+                        filter_parts = []
+                        if "year" in filters_dict:
+                            filter_parts.append(str(filters_dict["year"]))
+                        if "month" in filters_dict:
+                            filter_parts.append(filters_dict["month"])
+                        if "survey_name" in filters_dict:
+                            filter_parts.append(filters_dict["survey_name"])
+                        if filter_parts:
+                            query_text = f"{query_text} {' '.join(filter_parts)}"
+                    # Retrieve raw data without LLM summarization
+                    crosstab_result = self.crosstab_rag.retrieve_raw_data(user_query=query_text, filters=filters_dict)
+                    # Handle error response
+                    if "error" in crosstab_result:
+                        if self.verbose:
+                            print(f"⚠️  [Crosstabs Pipeline] Query error: {crosstab_result['error']}")
+                        stage_result.status = "partial"
+                        if not stage_result.extracted_context:
+                            stage_result.extracted_context = {}
+                        stage_result.extracted_context["crosstabs_error"] = crosstab_result["error"]
+                    else:
+                        # Success - store raw data with label if provided
+                        if ds.result_label:
+                            crosstab_result["label"] = ds.result_label
+                        stage_result.crosstabs_results = crosstab_result if stage_result.crosstabs_results is None else {
+                            "multiple": True,
+                            "results": [stage_result.crosstabs_results, crosstab_result]
+                        }
+                        if self.verbose:
+                            matched_vars = crosstab_result.get("matched_variables", [])
+                            namespace = crosstab_result.get("namespace_used", "unknown")
+                            num_questions = len(matched_vars)
+                            print(f"✅ [Crosstabs Pipeline] Complete")
+                            print(f"   Matched variables: {', '.join(matched_vars[:3])}{' ...' if len(matched_vars) > 3 else ''}")
+                            print(f"   Namespace: {namespace}")
+                            print(f"   Questions with crosstab data: {num_questions}")
+                except Exception as e:
+                    if self.verbose:
+                        print(f"⚠️  Error querying crosstabs: {e}")
+                    stage_result.status = "partial"
+                    if not stage_result.extracted_context:
+                        stage_result.extracted_context = {}
+                    stage_result.extracted_context["crosstabs_error"] = str(e)
         # TODO: REMOVE WHEN PIPELINES READY - START
         # Add a note about unavailable pipelines to the stage result
         if unavailable_pipelines:
                 stage_result.extracted_context = {}
             stage_result.extracted_context["unavailable_pipelines"] = unavailable_pipelines
         # TODO: REMOVE WHEN PIPELINES READY - END
         # Add stage result to list
         updated_stage_results = previous_stage_results + [stage_result]
         # For single-stage, also populate legacy fields
         if brief.action == "route_to_sources":
             return {
                 "crosstabs_results": stage_result.crosstabs_results,
                 "sql_results": stage_result.sql_results
             }
         return {
             "stage_results": updated_stage_results,
             "current_stage": current_stage_idx + 1  # FIXED: Increment stage counter
         }
     def _enrich_data_sources_with_context(
         self,
         data_sources: List[DataSource],
         use_instruction: str
     ) -> List[DataSource]:
         """Enrich data sources with context from previous stages"""
         if self.verbose:
             print(f"  Enriching with context: {use_instruction}")
         # For now, handle the most common case: extracting question IDs
         if "question" in use_instruction.lower() and "id" in use_instruction.lower():
             # Extract question IDs from previous questionnaire results
                     q_results = prev_result.questionnaire_results
                     if "source_questions" in q_results:
                         question_ids.extend([q.get("question_id") for q in q_results["source_questions"]])
             if question_ids and self.verbose:
                 print(f"  Found {len(question_ids)} question IDs from previous stages")
             # Add question_ids to filters
             enriched_sources = []
             for ds in data_sources:
                 new_filters = ds.filters.model_copy()
                 new_filters.question_ids = question_ids if question_ids else None
                 enriched_ds = ds.model_copy()
                 enriched_ds.filters = new_filters
                 enriched_sources.append(enriched_ds)
             return enriched_sources
         return data_sources
     def _extract_stage_context(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Extract key context from completed stage for use in next stages"""
         stage_results = state.get("stage_results", [])
         if not stage_results:
             return {}
         current_result = stage_results[-1]
         # Extract question IDs if questionnaire results exist
         extracted_context = {}
         if current_result.questionnaire_results:
             q_results = current_result.questionnaire_results
             if "source_questions" in q_results:
                 question_ids = [q.get("question_id") for q in q_results["source_questions"]]
                 extracted_context["question_ids"] = question_ids
                 if self.verbose:
                     print(f"\n=== EXTRACTED CONTEXT FROM STAGE {current_result.stage_number} ===")
                     print(f"Question IDs: {len(question_ids)} extracted")
         # Update the stage result with extracted context
         current_result.extracted_context = extracted_context
         return {}
     def _route_after_stage(self, state: SurveyAnalysisState) -> str:
         """Decide if we need to execute another stage or move to verification"""
         brief = state["research_brief"]
         current_stage_idx = state.get("current_stage", 0)
         # Single-stage query
         if brief.action == "route_to_sources":
             if self.verbose:
                 print("\n=== SINGLE-STAGE COMPLETE → VERIFICATION ===")
             return "verify"
         # Multi-stage query
         total_stages = len(brief.stages)
         # FIXED: Don't add 1 here since current_stage was already incremented
         if current_stage_idx < total_stages:
             if self.verbose:
             if self.verbose:
                 print(f"\n=== ALL {total_stages} STAGES COMPLETE → VERIFICATION ===")
             return "verify"
     def _verify_results(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Verify that retrieved data answers the question"""
         if self.verbose:
             print("\n=== VERIFYING RESULTS ===")
         # Use the latest question only
         question = self._get_full_question_context(state)
         stage_results = state.get("stage_results", [])
         brief = state["research_brief"]
+        # Build detailed summary of raw data retrieved
         retrieval_summary = []
+        raw_data_details = []
+        filters_applied_list = []
         total_questions = 0
+        total_toplines_docs = 0
+        total_crosstab_variables = 0
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_pipelines_found = []
         # TODO: REMOVE WHEN PIPELINES READY - END
         for stage_result in stage_results:
+            stage_num = stage_result.stage_number
+            # Analyze questionnaire raw data
             if stage_result.questionnaire_results:
                 q_res = stage_result.questionnaire_results
+                # Handle multiple results
+                if isinstance(q_res, dict) and q_res.get("multiple"):
+                    all_q_results = q_res.get("results", [])
+                else:
+                    all_q_results = [q_res]
+                for q_result in all_q_results:
+                    num = q_result.get("num_sources", 0)
+                    total_questions += num
+                    source_questions = q_result.get("source_questions", [])
+                    filters_applied = q_result.get("filters_applied", {})
+                    # Collect filters applied
+                    if filters_applied:
+                        filters_applied_list.append(f"Stage {stage_num} Questionnaire: {filters_applied}")
+                    if num > 0:
+                        # Extract key details about retrieved questions
+                        question_topics = set()
+                        question_vars = []
+                        question_years = set()
+                        question_months = set()
+                        for q in source_questions[:5]:  # Sample first 5
+                            topics = q.get("topics", [])
+                            if isinstance(topics, str):
+                                topics = [t.strip() for t in topics.split(",")]
+                            question_topics.update(topics)
+                            question_vars.append(q.get("variable_name", "unknown"))
+                            if q.get("year"):
+                                question_years.add(str(q.get("year")))
+                            if q.get("month"):
+                                question_months.add(q.get("month"))
+                        topics_str = ", ".join(sorted(question_topics)[:5])
+                        vars_str = ", ".join(question_vars[:3])
+                        years_str = ", ".join(sorted(question_years)) if question_years else "unknown"
+                        months_str = ", ".join(sorted(question_months)) if question_months else "unknown"
+                        time_period_info = f"{months_str} {years_str}" if months_str != "unknown" else years_str
+                        retrieval_summary.append(
+                            f"Stage {stage_num}: {num} questionnaire question(s) "
+                            f"(time period: {time_period_info}, variables: {vars_str}{'...' if num > 3 else ''}, topics: {topics_str[:50]})"
+                        )
+                        raw_data_details.append(
+                            f"Stage {stage_num} Questionnaire Data:\n"
+                            f"- Number of questions: {num}\n"
+                            f"- Time period: {time_period_info}\n"
+                            f"- Sample variables: {vars_str}\n"
+                            f"- Topics covered: {topics_str[:100]}\n"
+                        )
+                    else:
+                        retrieval_summary.append(f"Stage {stage_num}: No questionnaire questions retrieved")
+            # Analyze toplines raw data
+            if stage_result.toplines_results:
+                t_res = stage_result.toplines_results
+                # Handle multiple results
+                if isinstance(t_res, dict) and t_res.get("multiple"):
+                    all_t_results = t_res.get("results", [])
+                else:
+                    all_t_results = [t_res]
+                for t_result in all_t_results:
+                    num_docs = t_result.get("num_sources", 0)
+                    total_toplines_docs += num_docs
+                    retrieved_docs = t_result.get("retrieved_docs", [])
+                    if num_docs > 0:
+                        # Extract key details from toplines documents
+                        sample_vars = []
+                        sample_responses = []
+                        for doc in retrieved_docs[:3]:
+                            var = doc.metadata.get("variable_name", "unknown")
+                            response = doc.metadata.get("response_label", "")
+                            sample_vars.append(var)
+                            if response:
+                                sample_responses.append(response[:30])
+                        vars_str = ", ".join(sample_vars)
+                        retrieval_summary.append(
+                            f"Stage {stage_num}: {num_docs} topline document(s) "
+                            f"(variables: {vars_str}{'...' if num_docs > 3 else ''})"
+                        )
+                        raw_data_details.append(
+                            f"Stage {stage_num} Toplines Data:\n"
+                            f"- Number of documents: {num_docs}\n"
+                            f"- Sample variables: {vars_str}\n"
+                            f"- Sample responses: {', '.join(sample_responses[:3])}\n"
+                        )
+                    else:
+                        retrieval_summary.append(f"Stage {stage_num}: No topline documents retrieved")
+            # Analyze crosstabs raw data
+            if stage_result.crosstabs_results:
+                c_res = stage_result.crosstabs_results
+                # Handle multiple results
+                if isinstance(c_res, dict) and c_res.get("multiple"):
+                    all_c_results = c_res.get("results", [])
+                else:
+                    all_c_results = [c_res]
+                for c_result in all_c_results:
+                    if "error" in c_result:
+                        retrieval_summary.append(f"Stage {stage_num}: Crosstabs error - {c_result['error']}")
+                        continue
+                    crosstab_docs_by_var = c_result.get("crosstab_docs_by_variable", {})
+                    matched_vars = c_result.get("matched_variables", [])
+                    total_crosstab_variables += len(matched_vars)
+                    if matched_vars:
+                        vars_str = ", ".join(matched_vars[:3])
+                        namespace = c_result.get("namespace_used", "unknown")
+                        retrieval_summary.append(
+                            f"Stage {stage_num}: {len(matched_vars)} crosstab variable(s) "
+                            f"(variables: {vars_str}{'...' if len(matched_vars) > 3 else ''}, namespace: {namespace})"
+                        )
+                        # Count total crosstab documents
+                        total_crosstab_docs = sum(
+                            len(var_data.get("crosstab_docs", []))
+                            for var_data in crosstab_docs_by_var.values()
+                        )
+                        raw_data_details.append(
+                            f"Stage {stage_num} Crosstabs Data:\n"
+                            f"- Number of variables: {len(matched_vars)}\n"
+                            f"- Variables: {vars_str}\n"
+                            f"- Total crosstab documents: {total_crosstab_docs}\n"
+                            f"- Namespace: {namespace}\n"
+                        )
+                    else:
+                        retrieval_summary.append(f"Stage {stage_num}: No crosstab data retrieved")
             # TODO: REMOVE WHEN PIPELINES READY - START
             # Check if any pipelines were unavailable
             if stage_result.extracted_context and "unavailable_pipelines" in stage_result.extracted_context:
                 unavailable = stage_result.extracted_context["unavailable_pipelines"]
                 unavailable_pipelines_found.extend(unavailable)
+                retrieval_summary.append(f"Stage {stage_num}: ⚠️  {', '.join(unavailable)} not yet available")
             # TODO: REMOVE WHEN PIPELINES READY - END
         if not retrieval_summary:
             retrieval_summary.append("No data was retrieved")
+        # Auto-fail if we got 0 results across all data types
+        if total_questions == 0 and total_toplines_docs == 0 and total_crosstab_variables == 0:
             if self.verbose:
                 print("✗ Auto-fail: No results retrieved (skipping retry - data doesn't exist)")
             return {
                 "verification": VerificationResult(
                     answers_question=False,
                 ),
                 "retry_count": state.get("max_retries", self.max_retries)  # FIXED: Skip retry
             }
+        # For other cases, use LLM verification with raw data details
+        system_prompt = _load_prompt_file("verification_prompt_system.txt")
         verifier = self.llm.with_structured_output(VerificationResult)
+        # Build detailed verification prompt with raw data
+        verification_prompt_template = _load_prompt_file("verification_prompt_user.txt")
+        filters_info = chr(10).join(filters_applied_list) if filters_applied_list else "No explicit filters applied"
+        verification_prompt = verification_prompt_template.format(
+            question=question,
+            retrieval_summary=chr(10).join(retrieval_summary),
+            raw_data_details=chr(10).join(raw_data_details) if raw_data_details else 'No detailed data available',
+            filters_applied=filters_info
+        )
+        verification = verifier.invoke([
+            SystemMessage(content=system_prompt),
+            HumanMessage(content=verification_prompt)
         ])
         if self.verbose:
             print(f"Answers question: {verification.answers_question}")
             if not verification.answers_question:
                 print(f"Missing: {verification.missing_info}")
                 print(f"Suggestion: {verification.improvement_suggestion}")
         # Increment retry count if verification fails
         updates = {"verification": verification}
         if not verification.answers_question:
             current_retry = state.get("retry_count", 0)
             updates["retry_count"] = current_retry + 1
         return updates
     def _route_after_verification(self, state: SurveyAnalysisState) -> str:
         """Route based on verification result"""
         verification = state["verification"]
         retry_count = state.get("retry_count", 0)
         max_retries = state.get("max_retries", self.max_retries)
         if verification.answers_question:
             return "synthesize"
         elif retry_count < max_retries:
             if self.verbose:
                 print(f"\n⚠️  Max retries reached, proceeding with partial results")
             return "give_up"
     def _synthesize_response(self, state: SurveyAnalysisState) -> Dict[str, Any]:
         """Synthesize final response from all results"""
         if self.verbose:
             print("\n=== SYNTHESIZING RESPONSE ===")
         brief = state["research_brief"]
         # Use the latest question
         full_question = self._get_full_question_context(state)
         # Handle followup action
         if brief.action == "followup":
             if self.verbose:
                 "final_answer": brief.followup_question,
                 "messages": [AIMessage(content=brief.followup_question)]
             }
         # Handle direct answer (no data retrieval)
         if brief.action == "answer":
             if self.verbose:
                 SystemMessage(content="Answer the user's question directly."),
                 HumanMessage(content=full_question)
             ]).content
             return {
                 "final_answer": answer,
                 "messages": [AIMessage(content=answer)]
             }
         # Get stage results
         stage_results = state.get("stage_results", [])
         if not stage_results:
             if self.verbose:
                 print("No stage results available")
                 "final_answer": "I was unable to retrieve any data to answer your question.",
                 "messages": [AIMessage(content="I was unable to retrieve any data to answer your question.")]
             }
+        # Always synthesize from raw data (removed direct answer path)
         if self.verbose:
+            print(f"Synthesizing from {len(stage_results)} stage(s) using raw data")
+        # Build context from raw data structures
         context_parts = []
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_pipelines_overall = []
         # TODO: REMOVE WHEN PIPELINES READY - END
         for i, stage_result in enumerate(stage_results, 1):
+            # Format questionnaire raw data
             if stage_result.questionnaire_results:
                 q_res = stage_result.questionnaire_results
+                # Handle multiple results
+                if isinstance(q_res, dict) and q_res.get("multiple"):
+                    all_q_results = q_res.get("results", [])
+                else:
+                    all_q_results = [q_res]
+                for q_result in all_q_results:
+                    label = q_result.get("label", f"Stage {i}")
+                    source_questions = q_result.get("source_questions", [])
+                    context_parts.append(f"\n=== {label.upper()} (QUESTIONNAIRE DATA) ===")
+                    if not source_questions:
+                        context_parts.append("No questionnaire questions retrieved.")
+                    else:
+                        context_parts.append(f"Retrieved {len(source_questions)} question(s):\n")
+                        # Format each question
+                        for j, q in enumerate(source_questions, 1):
+                            q_parts = [
+                                f"Question {j}: {q.get('question_text', 'N/A')}",
+                                f"Variable: {q.get('variable_name', 'N/A')}",
+                                f"Poll: {q.get('poll_date', 'N/A')}",
+                                f"Response Options: {' | '.join(q.get('response_options', []))}",
+                            ]
+                            topics = q.get("topics", [])
+                            if isinstance(topics, str):
+                                topics = [t.strip() for t in topics.split(",")]
+                            if topics:
+                                q_parts.append(f"Topics: {', '.join(topics)}")
+                            context_parts.append("\n".join(q_parts))
+                            context_parts.append("")  # Blank line between questions
+            # Format toplines raw data
+            if stage_result.toplines_results:
+                t_res = stage_result.toplines_results
+                # Handle multiple results
+                if isinstance(t_res, dict) and t_res.get("multiple"):
+                    all_t_results = t_res.get("results", [])
+                else:
+                    all_t_results = [t_res]
+                for t_result in all_t_results:
+                    label = t_result.get("label", f"Stage {i}")
+                    retrieved_docs = t_result.get("retrieved_docs", [])
+                    context_parts.append(f"\n=== {label.upper()} (TOPLINES DATA) ===")
+                    if not retrieved_docs:
+                        context_parts.append("No topline documents retrieved.")
+                    else:
+                        context_parts.append(f"Retrieved {len(retrieved_docs)} topline document(s):\n")
+                        # Format each topline document - include full content
+                        for j, doc in enumerate(retrieved_docs, 1):
+                            metadata = doc.metadata or {}
+                            content = doc.page_content or ""
+                            doc_parts = [
+                                f"--- Topline Document {j} ---",
+                                f"Survey: {metadata.get('survey_name', 'Vanderbilt Unity Poll')} ({metadata.get('month', '')} {metadata.get('year', '')})",
+                                f"Poll Date: {metadata.get('poll_date', 'N/A')}",
+                                f"Variable: {metadata.get('variable_name', 'N/A')}",
+                                f"Response: {metadata.get('response_label', 'N/A')}",
+                                f"Percentage: {metadata.get('pct', 'N/A')}%",
+                            ]
+                            if content:
+                                doc_parts.append(f"\nFull Content:")
+                                doc_parts.append(content)  # FULL content, no truncation
+                            context_parts.append("\n".join(doc_parts))
+                            context_parts.append("")  # Blank line between documents
+            # Format crosstabs raw data
+            if stage_result.crosstabs_results:
+                c_res = stage_result.crosstabs_results
+                # Handle multiple results
+                if isinstance(c_res, dict) and c_res.get("multiple"):
+                    all_c_results = c_res.get("results", [])
+                else:
+                    all_c_results = [c_res]
+                for c_result in all_c_results:
+                    if "error" in c_result:
+                        context_parts.append(f"\n=== Stage {i} (CROSSTABS DATA) ===")
+                        context_parts.append(f"Error: {c_result['error']}")
+                        continue
+                    label = c_result.get("label", f"Stage {i}")
+                    crosstab_docs_by_var = c_result.get("crosstab_docs_by_variable", {})
+                    matched_vars = c_result.get("matched_variables", [])
+                    namespace = c_result.get("namespace_used", "unknown")
+                    survey_info = c_result.get("survey_info", {})
+                    context_parts.append(f"\n=== {label.upper()} (CROSSTABS DATA) ===")
+                    context_parts.append(
+                        f"Survey: {survey_info.get('poll', 'Unknown')} "
+                        f"({survey_info.get('month', '')} {survey_info.get('year', '')})"
+                    )
+                    context_parts.append(f"Namespace: {namespace}")
+                    context_parts.append(f"Matched {len(matched_vars)} variable(s): {', '.join(matched_vars)}\n")
+                    if not crosstab_docs_by_var:
+                        context_parts.append("No crosstab documents retrieved.")
+                    else:
+                        # Format crosstab data for each variable - include ALL content for full analysis
+                        for var_name, var_data in crosstab_docs_by_var.items():
+                            crosstab_docs = var_data.get("crosstab_docs", [])
+                            question_text = var_data.get("question_text", "")
+                            context_parts.append(f"\n{'='*80}")
+                            context_parts.append(f"Variable: {var_name}")
+                            context_parts.append(f"Question: {question_text}")
+                            context_parts.append(f"{'='*80}\n")
+                            # Include ALL chunks with FULL content - no truncation
+                            # Sort by chunk_index to maintain order
+                            sorted_docs = sorted(crosstab_docs, key=lambda d: d.metadata.get("chunk_index", 999))
+                            for doc in sorted_docs:
+                                content = doc.page_content or ""
+                                chunk_idx = doc.metadata.get("chunk_index", "?")
+                                context_parts.append(f"--- Crosstab Data Chunk {chunk_idx} ---")
+                                context_parts.append(content)  # FULL content, no truncation
+                                context_parts.append("")  # Blank line between chunks
+                            context_parts.append("")  # Extra blank line between variables
             # TODO: REMOVE WHEN PIPELINES READY - START
             # Track unavailable pipelines for note in synthesis
             if stage_result.extracted_context and "unavailable_pipelines" in stage_result.extracted_context:
                 unavailable_pipelines_overall.extend(unavailable)
                 context_parts.append(f"\n⚠️  Note: {', '.join(unavailable)} data was requested but not yet available")
             # TODO: REMOVE WHEN PIPELINES READY - END
         # TODO: REMOVE WHEN PIPELINES READY - START
         unavailable_note = ""
         if unavailable_pipelines_overall:
 Please answer based on the questionnaire data that IS available, and note any limitations.
 """
         # TODO: REMOVE WHEN PIPELINES READY - END
+        synthesis_prompt_template = _load_prompt_file("synthesis_prompt_user.txt")
+        synthesis_prompt = synthesis_prompt_template.format(
+            stage_count='multiple stages' if len(stage_results) > 1 else 'the research',
+            full_question=full_question,
+            reasoning=brief.reasoning,
+            context_parts=chr(10).join(context_parts),
+            unavailable_note=unavailable_note
+        )
+        synthesis_system_prompt = _load_prompt_file("synthesis_prompt_system.txt")
         final_answer = self.llm.invoke([
+            SystemMessage(content=synthesis_system_prompt),
             HumanMessage(content=synthesis_prompt)
         ]).content
         if self.verbose:
             print("Synthesis complete")
         return {
             "final_answer": final_answer,
             "messages": [AIMessage(content=final_answer)]
         }
     # ========================================================================
     # PUBLIC API
     # ========================================================================
     def query(self, question: str, thread_id: str = "default") -> str:
         """
         Query the survey analysis system.
         Args:
             question: User's question
             thread_id: Conversation thread ID for memory
         Returns:
             Answer string
         """
         # Create initial state for this turn
         initial_state = {
             "messages": [HumanMessage(content=question)],
             "retry_count": 0,
             "max_retries": self.max_retries
         }
         config = {
             "configurable": {"thread_id": thread_id},
             "recursion_limit": 50  # FIXED: Increased from default 25
         }
         if self.verbose:
             print(f"\n🧵 Thread ID: {thread_id}")
         final_state = self.graph.invoke(initial_state, config)
         return final_state["final_answer"]
     def stream_query(self, question: str, thread_id: str = "default"):
         """Stream the query execution for real-time updates"""
         initial_state = {
             "messages": [HumanMessage(content=question)],
             "user_question": question,
             "retry_count": 0,
             "max_retries": self.max_retries
         }
         config = {
             "configurable": {"thread_id": thread_id},
             "recursion_limit": 50  # FIXED: Increased from default 25
         }
         for event in self.graph.stream(initial_state, config):
             yield event
 def main():
     """Interactive CLI"""
     import sys
     openai_api_key = os.getenv("OPENAI_API_KEY")
     pinecone_api_key = os.getenv("PINECONE_API_KEY")
     if not openai_api_key or not pinecone_api_key:
         print("Error: Missing API keys")
         print("Set OPENAI_API_KEY and PINECONE_API_KEY environment variables")
         sys.exit(1)
     print("Initializing survey analysis agent...")
     agent = SurveyAnalysisAgent(
         openai_api_key=openai_api_key,
         pinecone_api_key=pinecone_api_key,
         verbose=True
     )
     print("\n" + "="*80)
     print("SURVEY ANALYSIS AGENT (WITH STAGED RESEARCH)")
     print("="*80)
     print("\nType 'quit' to exit\n")
     thread_id = "cli_session"
     while True:
         try:
             question = input("\nYour question: ").strip()
             if not question or question.lower() in ['quit', 'exit', 'q']:
                 print("\nGoodbye!")
                 break
             print("\n" + "-"*80)
             answer = agent.query(question, thread_id=thread_id)
             print("\n" + "="*80)
             print("="*80)
             print(answer)
             print("="*80)
         except KeyboardInterrupt:
             print("\n\nGoodbye!")
             break
 if __name__ == "__main__":
+    main()

toplines_rag.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+ToplinesRAG
+-----------
+Queries the prebuilt Pinecone toplines vectorstore and synthesizes
+a natural-language answer with citations using OpenAI.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from dotenv import load_dotenv
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain_pinecone import PineconeVectorStore
+from pinecone import Pinecone
+from calendar import month_name
+load_dotenv()
+def _load_prompt_file(filename: str) -> str:
+    """Load a prompt file from the prompts directory"""
+    prompt_dir = Path(__file__).parent / "prompts"
+    prompt_path = prompt_dir / filename
+    if not prompt_path.exists():
+        raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
+    return prompt_path.read_text(encoding="utf-8")
+class ToplinesRAG:
+    def __init__(
+        self,
+        persist_directory: str = "./toplines_vectorstores",
+        index_name: Optional[str] = None,
+        llm_model: str = "gpt-4-turbo",
+    ):
+        self.persist_directory = Path(persist_directory)
+        self.index_name = index_name or os.getenv("PINECONE_INDEX_NAME_TOPLINES", "toplines-index")
+        self.namespace = os.getenv("PINECONE_NAMESPACE") or None
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        if not self.openai_api_key:
+            raise ValueError("OPENAI_API_KEY not set")
+        pinecone_api_key = os.getenv("PINECONE_API_KEY_TOPLINES")
+        if not pinecone_api_key:
+            raise ValueError("PINECONE_API_KEY_TOPLINES not set")
+        self.embeddings = OpenAIEmbeddings(
+            model=os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
+        )
+        self.pc = Pinecone(api_key=pinecone_api_key)
+        self.index = self.pc.Index(self.index_name)
+        self.vector_store = PineconeVectorStore(
+            index=self.index, embedding=self.embeddings, namespace=self.namespace
+        )
+        self.llm_model = llm_model
+        self.llm = ChatOpenAI(
+            model=self.llm_model,
+            openai_api_key=self.openai_api_key,
+            temperature=0
+        )
+    # ----------------------------------------------------------
+    def _build_filter(self, filters: Dict[str, Any]) -> Optional[Dict]:
+        """
+        Build Pinecone filter from filters dict.
+        Only includes valid metadata fields that exist in the vectorstore.
+        Ignores unsupported fields like 'topic', 'question_ids', etc.
+        """
+        if not filters:
+            return None
+        # Valid filter fields that exist in toplines metadata
+        VALID_FILTER_FIELDS = {"year", "month", "poll_date", "survey_name"}
+        # Filter to only include valid fields
+        valid_filters = {k: v for k, v in filters.items()
+                        if k in VALID_FILTER_FIELDS and v is not None}
+        if not valid_filters:
+            return None
+        clauses = [{k: {"$eq": str(v)}} for k, v in valid_filters.items()]
+        return {"$and": clauses} if len(clauses) > 1 else clauses[0]
+    # ----------------------------------------------------------
+    def _extract_filters_from_query(self, query: str) -> Dict[str, str]:
+        filters = {}
+        year_match = re.search(r"20\d{2}", query)
+        if year_match:
+            filters["year"] = year_match.group()
+        for i in range(1, 13):
+            if month_name[i].lower() in query.lower():
+                filters["month"] = month_name[i]
+                break
+        return filters
+    # ----------------------------------------------------------
+    def _synthesize_answer(self, query: str, docs: List[Dict]) -> str:
+        """Generate a human-readable answer from the retrieved docs."""
+        if not docs:
+            # No docs retrieved → truly irrelevant query
+            return (
+                "Your query does not match any Vanderbilt Unity Poll data. "
+                "This system only provides information from those polls."
+            )
+        # Format retrieved documents for context
+        context_snippets = "\n\n".join(
+            f"Survey: {d.metadata.get('survey_name', 'Vanderbilt Unity Poll')} "
+            f"({d.metadata.get('month', '')} {d.metadata.get('year', '')})\n"
+            f"Question: {d.metadata.get('variable_name', '')}\n"
+            f"Response: {d.metadata.get('response_label', '')}\n"
+            f"Pct: {d.metadata.get('pct', 'N/A')}\n"
+            f"Poll Date: {d.metadata.get('poll_date', 'N/A')}"
+            for d in docs
+        )
+        # Load prompt from file
+        prompt_template = _load_prompt_file("toplines_rag_prompt.txt")
+        prompt = prompt_template.format(
+            query=query,
+            context_snippets=context_snippets
+        )
+        completion = self.llm.invoke(prompt)
+        answer_text = completion.content.strip()
+        # Build sources section
+        sources = [
+            f"- {d.metadata.get('survey_name', 'Vanderbilt Unity Poll')} "
+            f"({d.metadata.get('poll_date', 'N/A')}) | Variable: {d.metadata.get('variable_name', 'N/A')}"
+            for d in docs
+        ]
+        return f"\n--- ANSWER ---\n\n{answer_text}\n\n--- SOURCES ---\n" + "\n".join(sources)
+    # ----------------------------------------------------------
+    def query_toplines(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 5) -> str:
+        pinecone_filter = self._build_filter(filters or {})
+        # Try with filters first, but if no results, try without filters to see if data exists
+        docs = self.vector_store.similarity_search(query, k=top_k, filter=pinecone_filter)
+        # If no results with filters but filters were provided, try a broader search
+        if not docs and pinecone_filter:
+            # Try without filters to see if the query matches anything
+            docs_no_filter = self.vector_store.similarity_search(query, k=top_k * 2)
+            if docs_no_filter:
+                # Filter results manually by matching metadata
+                valid_filters = {k: str(v) for k, v in (filters or {}).items()
+                               if k in {"year", "month", "poll_date", "survey_name"} and v}
+                docs = [
+                    d for d in docs_no_filter
+                    if all(str(d.metadata.get(k, "")) == str(v) for k, v in valid_filters.items())
+                ]
+                # If still no matches after manual filtering, use the broader results
+                if not docs:
+                    docs = docs_no_filter[:top_k]
+        return self._synthesize_answer(query, docs)
+    # ----------------------------------------------------------
+    def retrieve_raw_data(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 5) -> Dict[str, Any]:
+        """
+        Retrieve raw data without LLM synthesis.
+        Used by agent framework to get raw data for synthesis.
+        Returns:
+            Dict with 'retrieved_docs', 'num_sources', 'filters_applied'
+        """
+        pinecone_filter = self._build_filter(filters or {})
+        # Try with filters first, but if no results, try without filters to see if data exists
+        docs = self.vector_store.similarity_search(query, k=top_k, filter=pinecone_filter)
+        # If no results with filters but filters were provided, try a broader search
+        if not docs and pinecone_filter:
+            # Try without filters to see if the query matches anything
+            docs_no_filter = self.vector_store.similarity_search(query, k=top_k * 2)
+            if docs_no_filter:
+                # Filter results manually by matching metadata
+                valid_filters = {k: str(v) for k, v in (filters or {}).items()
+                               if k in {"year", "month", "poll_date", "survey_name"} and v}
+                docs = [
+                    d for d in docs_no_filter
+                    if all(str(d.metadata.get(k, "")) == str(v) for k, v in valid_filters.items())
+                ]
+                # If still no matches after manual filtering, use the broader results
+                if not docs:
+                    docs = docs_no_filter[:top_k]
+        return {
+            "retrieved_docs": docs,
+            "num_sources": len(docs),
+            "filters_applied": filters or {}
+        }
+    # ----------------------------------------------------------
+    def interactive_loop(self):
+        print("ToplinesRAG ready! Type 'quit' or 'exit' to stop.\n")
+        while True:
+            query = input("Enter your poll question: ").strip()
+            if query.lower() in ("quit", "exit"):
+                print("Exiting ToplinesRAG. Goodbye!")
+                break
+            filters = self._extract_filters_from_query(query)
+            if filters:
+                print(f"Using filters: {filters}")
+            print("\nRetrieving answer...\n")
+            answer = self.query_toplines(query, filters=filters)
+            print(answer)
+            print("\n" + "-"*60 + "\n")
+if __name__ == "__main__":
+    rag = ToplinesRAG()
+    rag.interactive_loop()

toplines_vectorstores/poll_catalog_toplines.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "2025-February": {
+    "file": "toplines_data/Vanderbilt_Unity_Poll_2025_February_toplines.json",
+    "poll_date": "2025-February",
+    "num_toplines": 41,
+    "survey_name": "Vanderbilt Unity Poll",
+    "year": "2025",
+    "month": "February"
+  },
+  "2025-June": {
+    "file": "toplines_data/Vanderbilt_Unity_Poll_2025_June_toplines.json",
+    "poll_date": "2025-June",
+    "num_toplines": 167,
+    "survey_name": "Vanderbilt Unity Poll",
+    "year": "2025",
+    "month": "June"
+  }
+}

toplines_vectorstores/toplines_index.json ADDED Viewed

	@@ -0,0 +1,2290 @@

+{
+  "2025_February_0": {
+    "topline_id": "2025_February_0",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "The powers of the presidency should be increased",
+    "pct": "7",
+    "count": ""
+  },
+  "2025_February_1": {
+    "topline_id": "2025_February_1",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "The powers of the presidency should be decreased",
+    "pct": "37",
+    "count": ""
+  },
+  "2025_February_2": {
+    "topline_id": "2025_February_2",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "The system needs reform, but there is no need to change the powers of the presidency",
+    "pct": "51",
+    "count": ""
+  },
+  "2025_February_3": {
+    "topline_id": "2025_February_3",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "The system does not need reform",
+    "pct": "5",
+    "count": ""
+  },
+  "2025_February_4": {
+    "topline_id": "2025_February_4",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_5": {
+    "topline_id": "2025_February_5",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Many Americans believe our political system needs reform. One area of possible reform involves the powers of the American presidency. Which of the following comes closest to your opinion?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_6": {
+    "topline_id": "2025_February_6",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Mostly united",
+    "pct": "4",
+    "count": ""
+  },
+  "2025_February_7": {
+    "topline_id": "2025_February_7",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Somewhat united",
+    "pct": "16",
+    "count": ""
+  },
+  "2025_February_8": {
+    "topline_id": "2025_February_8",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Somewhat divided",
+    "pct": "35",
+    "count": ""
+  },
+  "2025_February_9": {
+    "topline_id": "2025_February_9",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Mostly divided",
+    "pct": "45",
+    "count": ""
+  },
+  "2025_February_10": {
+    "topline_id": "2025_February_10",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_11": {
+    "topline_id": "2025_February_11",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to the most pressing issues facing the country today, in general, would you say ordinary Americans are:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_12": {
+    "topline_id": "2025_February_12",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Very likely",
+    "pct": "6",
+    "count": ""
+  },
+  "2025_February_13": {
+    "topline_id": "2025_February_13",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Somewhat likely",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_February_14": {
+    "topline_id": "2025_February_14",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Somewhat unlikely",
+    "pct": "39",
+    "count": ""
+  },
+  "2025_February_15": {
+    "topline_id": "2025_February_15",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Very unlikely",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_February_16": {
+    "topline_id": "2025_February_16",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_17": {
+    "topline_id": "2025_February_17",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When trying to solve important problems facing the country today, how likely, if at all, do you think it is that Americans will unite?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_18": {
+    "topline_id": "2025_February_18",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Every day",
+    "pct": "5",
+    "count": ""
+  },
+  "2025_February_19": {
+    "topline_id": "2025_February_19",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Every few days",
+    "pct": "9",
+    "count": ""
+  },
+  "2025_February_20": {
+    "topline_id": "2025_February_20",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Once a week",
+    "pct": "10",
+    "count": ""
+  },
+  "2025_February_21": {
+    "topline_id": "2025_February_21",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "A few times a month",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_February_22": {
+    "topline_id": "2025_February_22",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "A few times a year",
+    "pct": "26",
+    "count": ""
+  },
+  "2025_February_23": {
+    "topline_id": "2025_February_23",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Never \u2013 I talk about politics, but not with anyone who has an opposing political viewpoint",
+    "pct": "13",
+    "count": ""
+  },
+  "2025_February_24": {
+    "topline_id": "2025_February_24",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Never - I don\u2019t talk about politics",
+    "pct": "19",
+    "count": ""
+  },
+  "2025_February_25": {
+    "topline_id": "2025_February_25",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_26": {
+    "topline_id": "2025_February_26",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How often do you talk about politics with someone who has an opposing political viewpoint?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_27": {
+    "topline_id": "2025_February_27",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closest to your view, even if neither is exactly right?",
+    "response_label": "I would prefer my elected officials work with members of the other political party even if it means they have to compromise on some of their values and priorities.",
+    "pct": "76",
+    "count": ""
+  },
+  "2025_February_28": {
+    "topline_id": "2025_February_28",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closest to your view, even if neither is exactly right?",
+    "response_label": "I would prefer my elected officials pursue their own values and priorities even if it means they are unwilling to work with members of the other political party.",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_February_29": {
+    "topline_id": "2025_February_29",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closest to your view, even if neither is exactly right?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_30": {
+    "topline_id": "2025_February_30",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closest to your view, even if neither is exactly right?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_31": {
+    "topline_id": "2025_February_31",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Very confident",
+    "pct": "7",
+    "count": ""
+  },
+  "2025_February_32": {
+    "topline_id": "2025_February_32",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Somewhat confident",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_February_33": {
+    "topline_id": "2025_February_33",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Just a little confident",
+    "pct": "31",
+    "count": ""
+  },
+  "2025_February_34": {
+    "topline_id": "2025_February_34",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Not at all confident",
+    "pct": "39",
+    "count": ""
+  },
+  "2025_February_35": {
+    "topline_id": "2025_February_35",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_36": {
+    "topline_id": "2025_February_36",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, how confident are you that the political system in the United States today reflects the public\u2019s views on the pressing issues of the day?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_37": {
+    "topline_id": "2025_February_37",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "More of a supporter of the Make America Great Again or MAGA movement",
+    "pct": "52",
+    "count": ""
+  },
+  "2025_February_38": {
+    "topline_id": "2025_February_38",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "More of a supporter of the Republican Party",
+    "pct": "48",
+    "count": ""
+  },
+  "2025_February_39": {
+    "topline_id": "2025_February_39",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_February_40": {
+    "topline_id": "2025_February_40",
+    "poll_date": "2025-February",
+    "year": "2025",
+    "month": "February",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_0": {
+    "topline_id": "2025_June_0",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Politics",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_1": {
+    "topline_id": "2025_June_1",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Sports",
+    "pct": "13",
+    "count": ""
+  },
+  "2025_June_2": {
+    "topline_id": "2025_June_2",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Culture and entertainment",
+    "pct": "10",
+    "count": ""
+  },
+  "2025_June_3": {
+    "topline_id": "2025_June_3",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Personal finance and money",
+    "pct": "13",
+    "count": ""
+  },
+  "2025_June_4": {
+    "topline_id": "2025_June_4",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Lifestyle issues",
+    "pct": "6",
+    "count": ""
+  },
+  "2025_June_5": {
+    "topline_id": "2025_June_5",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Travel",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_6": {
+    "topline_id": "2025_June_6",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Health and wellness",
+    "pct": "25",
+    "count": ""
+  },
+  "2025_June_7": {
+    "topline_id": "2025_June_7",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Technology and innovation",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_8": {
+    "topline_id": "2025_June_8",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_9": {
+    "topline_id": "2025_June_9",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following topics are you most interested in?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_10": {
+    "topline_id": "2025_June_10",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is your personal financial situation today:",
+    "response_label": "Better",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_June_11": {
+    "topline_id": "2025_June_11",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is your personal financial situation today:",
+    "response_label": "About the same",
+    "pct": "49",
+    "count": ""
+  },
+  "2025_June_12": {
+    "topline_id": "2025_June_12",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is your personal financial situation today:",
+    "response_label": "Worse",
+    "pct": "27",
+    "count": ""
+  },
+  "2025_June_13": {
+    "topline_id": "2025_June_13",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is your personal financial situation today:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_14": {
+    "topline_id": "2025_June_14",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is your personal financial situation today:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_15": {
+    "topline_id": "2025_June_15",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is the level of economic uncertainty you face today:",
+    "response_label": "Better",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_16": {
+    "topline_id": "2025_June_16",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is the level of economic uncertainty you face today:",
+    "response_label": "About the same",
+    "pct": "35",
+    "count": ""
+  },
+  "2025_June_17": {
+    "topline_id": "2025_June_17",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is the level of economic uncertainty you face today:",
+    "response_label": "Worse",
+    "pct": "50",
+    "count": ""
+  },
+  "2025_June_18": {
+    "topline_id": "2025_June_18",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is the level of economic uncertainty you face today:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_19": {
+    "topline_id": "2025_June_19",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Compared to one year ago, is the level of economic uncertainty you face today:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_20": {
+    "topline_id": "2025_June_20",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Now thinking ahead, in the next year, do you think President Trump\u2019s tariff policies will make your personal finances:",
+    "response_label": "Better",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_21": {
+    "topline_id": "2025_June_21",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Now thinking ahead, in the next year, do you think President Trump\u2019s tariff policies will make your personal finances:",
+    "response_label": "About the same",
+    "pct": "28",
+    "count": ""
+  },
+  "2025_June_22": {
+    "topline_id": "2025_June_22",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Now thinking ahead, in the next year, do you think President Trump\u2019s tariff policies will make your personal finances:",
+    "response_label": "Worse",
+    "pct": "58",
+    "count": ""
+  },
+  "2025_June_23": {
+    "topline_id": "2025_June_23",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Now thinking ahead, in the next year, do you think President Trump\u2019s tariff policies will make your personal finances:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_24": {
+    "topline_id": "2025_June_24",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Now thinking ahead, in the next year, do you think President Trump\u2019s tariff policies will make your personal finances:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_25": {
+    "topline_id": "2025_June_25",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Concerned NET",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_June_26": {
+    "topline_id": "2025_June_26",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Not concerned NET",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_June_27": {
+    "topline_id": "2025_June_27",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Extremely concerned",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_28": {
+    "topline_id": "2025_June_28",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Very concerned",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_June_29": {
+    "topline_id": "2025_June_29",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Somewhat concerned",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_June_30": {
+    "topline_id": "2025_June_30",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Not too concerned",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_June_31": {
+    "topline_id": "2025_June_31",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Not at all concerned",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_32": {
+    "topline_id": "2025_June_32",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_33": {
+    "topline_id": "2025_June_33",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How concerned, if at all, are you about the recent instability of the stock market?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_34": {
+    "topline_id": "2025_June_34",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Better NET",
+    "pct": "29",
+    "count": ""
+  },
+  "2025_June_35": {
+    "topline_id": "2025_June_35",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Worse NET",
+    "pct": "36",
+    "count": ""
+  },
+  "2025_June_36": {
+    "topline_id": "2025_June_36",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Much better",
+    "pct": "4",
+    "count": ""
+  },
+  "2025_June_37": {
+    "topline_id": "2025_June_37",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Somewhat better",
+    "pct": "25",
+    "count": ""
+  },
+  "2025_June_38": {
+    "topline_id": "2025_June_38",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Neither better nor worse",
+    "pct": "34",
+    "count": ""
+  },
+  "2025_June_39": {
+    "topline_id": "2025_June_39",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Somewhat worse",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_June_40": {
+    "topline_id": "2025_June_40",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Much worse",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_41": {
+    "topline_id": "2025_June_41",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_42": {
+    "topline_id": "2025_June_42",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In general, do you think artificial intelligence (AI) will make the life of you and your family:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_43": {
+    "topline_id": "2025_June_43",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Satisfied NET",
+    "pct": "31",
+    "count": ""
+  },
+  "2025_June_44": {
+    "topline_id": "2025_June_44",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Dissatisfied NET",
+    "pct": "55",
+    "count": ""
+  },
+  "2025_June_45": {
+    "topline_id": "2025_June_45",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Enthusiastic",
+    "pct": "13",
+    "count": ""
+  },
+  "2025_June_46": {
+    "topline_id": "2025_June_46",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Satisfied, but not enthusiastic",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_June_47": {
+    "topline_id": "2025_June_47",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Don\u2019t have a reaction either way",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_48": {
+    "topline_id": "2025_June_48",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Dissatisfied, but not angry",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_June_49": {
+    "topline_id": "2025_June_49",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Angry",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_June_50": {
+    "topline_id": "2025_June_50",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Don\u2019t know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_51": {
+    "topline_id": "2025_June_51",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_52": {
+    "topline_id": "2025_June_52",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following emotions best describes the way you currently feel about the actions the Trump administration has taken so far during its term?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_53": {
+    "topline_id": "2025_June_53",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Strong/Somewhat approve NET",
+    "pct": "40",
+    "count": ""
+  },
+  "2025_June_54": {
+    "topline_id": "2025_June_54",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Somewhat/Strong disapprove NET",
+    "pct": "59",
+    "count": ""
+  },
+  "2025_June_55": {
+    "topline_id": "2025_June_55",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Strongly approve",
+    "pct": "9",
+    "count": ""
+  },
+  "2025_June_56": {
+    "topline_id": "2025_June_56",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Somewhat approve",
+    "pct": "32",
+    "count": ""
+  },
+  "2025_June_57": {
+    "topline_id": "2025_June_57",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Somewhat disapprove",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_June_58": {
+    "topline_id": "2025_June_58",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Strongly disapprove",
+    "pct": "38",
+    "count": ""
+  },
+  "2025_June_59": {
+    "topline_id": "2025_June_59",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_60": {
+    "topline_id": "2025_June_60",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the job that Joe Biden did as president?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_61": {
+    "topline_id": "2025_June_61",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Strong/Somewhat support NET",
+    "pct": "39",
+    "count": ""
+  },
+  "2025_June_62": {
+    "topline_id": "2025_June_62",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Somewhat/Strong oppose NET",
+    "pct": "46",
+    "count": ""
+  },
+  "2025_June_63": {
+    "topline_id": "2025_June_63",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Strongly support",
+    "pct": "21",
+    "count": ""
+  },
+  "2025_June_64": {
+    "topline_id": "2025_June_64",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Somewhat support",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_June_65": {
+    "topline_id": "2025_June_65",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Neither support nor oppose",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_66": {
+    "topline_id": "2025_June_66",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Somewhat oppose",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_67": {
+    "topline_id": "2025_June_67",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Strongly oppose",
+    "pct": "35",
+    "count": ""
+  },
+  "2025_June_68": {
+    "topline_id": "2025_June_68",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_69": {
+    "topline_id": "2025_June_69",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries without a formal hearing before a judge in an immigration court?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_70": {
+    "topline_id": "2025_June_70",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Strong/Somewhat support NET",
+    "pct": "51",
+    "count": ""
+  },
+  "2025_June_71": {
+    "topline_id": "2025_June_71",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Somewhat/Strong oppose NET",
+    "pct": "31",
+    "count": ""
+  },
+  "2025_June_72": {
+    "topline_id": "2025_June_72",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Strongly support",
+    "pct": "27",
+    "count": ""
+  },
+  "2025_June_73": {
+    "topline_id": "2025_June_73",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Somewhat support",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_June_74": {
+    "topline_id": "2025_June_74",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Neither support nor oppose",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_June_75": {
+    "topline_id": "2025_June_75",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Somewhat oppose",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_76": {
+    "topline_id": "2025_June_76",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Strongly oppose",
+    "pct": "17",
+    "count": ""
+  },
+  "2025_June_77": {
+    "topline_id": "2025_June_77",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_78": {
+    "topline_id": "2025_June_78",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose deporting individuals who are living in the United States illegally back to their home countries?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_79": {
+    "topline_id": "2025_June_79",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Important NET",
+    "pct": "76",
+    "count": ""
+  },
+  "2025_June_80": {
+    "topline_id": "2025_June_80",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Not important NET",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_June_81": {
+    "topline_id": "2025_June_81",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Very important",
+    "pct": "31",
+    "count": ""
+  },
+  "2025_June_82": {
+    "topline_id": "2025_June_82",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Somewhat important",
+    "pct": "46",
+    "count": ""
+  },
+  "2025_June_83": {
+    "topline_id": "2025_June_83",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Not too important",
+    "pct": "19",
+    "count": ""
+  },
+  "2025_June_84": {
+    "topline_id": "2025_June_84",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Not at all important",
+    "pct": "4",
+    "count": ""
+  },
+  "2025_June_85": {
+    "topline_id": "2025_June_85",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_86": {
+    "topline_id": "2025_June_86",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How important is a college education for a young person to succeed in the world today?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_87": {
+    "topline_id": "2025_June_87",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Strong/Somewhat approve NET",
+    "pct": "32",
+    "count": ""
+  },
+  "2025_June_88": {
+    "topline_id": "2025_June_88",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Somewhat/Strong disapprove NET",
+    "pct": "68",
+    "count": ""
+  },
+  "2025_June_89": {
+    "topline_id": "2025_June_89",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Strongly approve",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_90": {
+    "topline_id": "2025_June_90",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Somewhat approve",
+    "pct": "17",
+    "count": ""
+  },
+  "2025_June_91": {
+    "topline_id": "2025_June_91",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Somewhat disapprove",
+    "pct": "25",
+    "count": ""
+  },
+  "2025_June_92": {
+    "topline_id": "2025_June_92",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Strongly disapprove",
+    "pct": "43",
+    "count": ""
+  },
+  "2025_June_93": {
+    "topline_id": "2025_June_93",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_94": {
+    "topline_id": "2025_June_94",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of cutting resources to and ultimately eliminating the Department of Education?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_95": {
+    "topline_id": "2025_June_95",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Strong/Somewhat approve NET",
+    "pct": "39",
+    "count": ""
+  },
+  "2025_June_96": {
+    "topline_id": "2025_June_96",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Somewhat/Strong disapprove NET",
+    "pct": "61",
+    "count": ""
+  },
+  "2025_June_97": {
+    "topline_id": "2025_June_97",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Strongly approve",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_98": {
+    "topline_id": "2025_June_98",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Somewhat approve",
+    "pct": "25",
+    "count": ""
+  },
+  "2025_June_99": {
+    "topline_id": "2025_June_99",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Somewhat disapprove",
+    "pct": "24",
+    "count": ""
+  },
+  "2025_June_100": {
+    "topline_id": "2025_June_100",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Strongly disapprove",
+    "pct": "37",
+    "count": ""
+  },
+  "2025_June_101": {
+    "topline_id": "2025_June_101",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_102": {
+    "topline_id": "2025_June_102",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you approve or disapprove of the way Donald Trump is handling issues related to colleges and universities?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_103": {
+    "topline_id": "2025_June_103",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Confident NET",
+    "pct": "46",
+    "count": ""
+  },
+  "2025_June_104": {
+    "topline_id": "2025_June_104",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Not confident NET",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_105": {
+    "topline_id": "2025_June_105",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "A great deal",
+    "pct": "12",
+    "count": ""
+  },
+  "2025_June_106": {
+    "topline_id": "2025_June_106",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Quite a lot",
+    "pct": "34",
+    "count": ""
+  },
+  "2025_June_107": {
+    "topline_id": "2025_June_107",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Some",
+    "pct": "39",
+    "count": ""
+  },
+  "2025_June_108": {
+    "topline_id": "2025_June_108",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Very little",
+    "pct": "9",
+    "count": ""
+  },
+  "2025_June_109": {
+    "topline_id": "2025_June_109",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "None at all",
+    "pct": "6",
+    "count": ""
+  },
+  "2025_June_110": {
+    "topline_id": "2025_June_110",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_111": {
+    "topline_id": "2025_June_111",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in public colleges and universities?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_112": {
+    "topline_id": "2025_June_112",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Confident NET",
+    "pct": "30",
+    "count": ""
+  },
+  "2025_June_113": {
+    "topline_id": "2025_June_113",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Not confident NET",
+    "pct": "26",
+    "count": ""
+  },
+  "2025_June_114": {
+    "topline_id": "2025_June_114",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "A great deal",
+    "pct": "11",
+    "count": ""
+  },
+  "2025_June_115": {
+    "topline_id": "2025_June_115",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Quite a lot",
+    "pct": "20",
+    "count": ""
+  },
+  "2025_June_116": {
+    "topline_id": "2025_June_116",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Some",
+    "pct": "44",
+    "count": ""
+  },
+  "2025_June_117": {
+    "topline_id": "2025_June_117",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Very little",
+    "pct": "20",
+    "count": ""
+  },
+  "2025_June_118": {
+    "topline_id": "2025_June_118",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "None at all",
+    "pct": "6",
+    "count": ""
+  },
+  "2025_June_119": {
+    "topline_id": "2025_June_119",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_120": {
+    "topline_id": "2025_June_120",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much confidence do you have in private colleges and universities?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_121": {
+    "topline_id": "2025_June_121",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Strong/Somewhat support NET",
+    "pct": "33",
+    "count": ""
+  },
+  "2025_June_122": {
+    "topline_id": "2025_June_122",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Somewhat/Strong oppose NET",
+    "pct": "40",
+    "count": ""
+  },
+  "2025_June_123": {
+    "topline_id": "2025_June_123",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Strongly support",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_124": {
+    "topline_id": "2025_June_124",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Somewhat support",
+    "pct": "19",
+    "count": ""
+  },
+  "2025_June_125": {
+    "topline_id": "2025_June_125",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Neither support nor oppose",
+    "pct": "26",
+    "count": ""
+  },
+  "2025_June_126": {
+    "topline_id": "2025_June_126",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Somewhat oppose",
+    "pct": "14",
+    "count": ""
+  },
+  "2025_June_127": {
+    "topline_id": "2025_June_127",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Strongly oppose",
+    "pct": "27",
+    "count": ""
+  },
+  "2025_June_128": {
+    "topline_id": "2025_June_128",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_129": {
+    "topline_id": "2025_June_129",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "How much do you support or oppose the U.S. government monitoring the social media activity of current and prospective foreign students at U.S. colleges and universities?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_130": {
+    "topline_id": "2025_June_130",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Foreign students who graduate from colleges and universities should be granted a legal green card so they can stay and work in the United States after they graduate.",
+    "pct": "59",
+    "count": ""
+  },
+  "2025_June_131": {
+    "topline_id": "2025_June_131",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Foreign students who graduate but fail to secure a job that will sponsor their visa should be sent back to their home countries.",
+    "pct": "40",
+    "count": ""
+  },
+  "2025_June_132": {
+    "topline_id": "2025_June_132",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Don\u2019t know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_133": {
+    "topline_id": "2025_June_133",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_134": {
+    "topline_id": "2025_June_134",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_135": {
+    "topline_id": "2025_June_135",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Strong/Somewhat support NET",
+    "pct": "31",
+    "count": ""
+  },
+  "2025_June_136": {
+    "topline_id": "2025_June_136",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Somewhat/Strong oppose NET",
+    "pct": "46",
+    "count": ""
+  },
+  "2025_June_137": {
+    "topline_id": "2025_June_137",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Strongly support",
+    "pct": "16",
+    "count": ""
+  },
+  "2025_June_138": {
+    "topline_id": "2025_June_138",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Somewhat support",
+    "pct": "15",
+    "count": ""
+  },
+  "2025_June_139": {
+    "topline_id": "2025_June_139",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Neither support nor oppose",
+    "pct": "23",
+    "count": ""
+  },
+  "2025_June_140": {
+    "topline_id": "2025_June_140",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Somewhat oppose",
+    "pct": "18",
+    "count": ""
+  },
+  "2025_June_141": {
+    "topline_id": "2025_June_141",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Strongly oppose",
+    "pct": "28",
+    "count": ""
+  },
+  "2025_June_142": {
+    "topline_id": "2025_June_142",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_143": {
+    "topline_id": "2025_June_143",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "The Trump administration announced it is moving to \"aggressively revoke\" visas of current international students from China who are attending U.S. colleges and universities. How much do you support or",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_144": {
+    "topline_id": "2025_June_144",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you think the President of the United States should be able to determine if colleges and universities can enroll foreign students?",
+    "response_label": "Yes",
+    "pct": "22",
+    "count": ""
+  },
+  "2025_June_145": {
+    "topline_id": "2025_June_145",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you think the President of the United States should be able to determine if colleges and universities can enroll foreign students?",
+    "response_label": "No",
+    "pct": "61",
+    "count": ""
+  },
+  "2025_June_146": {
+    "topline_id": "2025_June_146",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you think the President of the United States should be able to determine if colleges and universities can enroll foreign students?",
+    "response_label": "Don\u2019t know",
+    "pct": "17",
+    "count": ""
+  },
+  "2025_June_147": {
+    "topline_id": "2025_June_147",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you think the President of the United States should be able to determine if colleges and universities can enroll foreign students?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_148": {
+    "topline_id": "2025_June_148",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Do you think the President of the United States should be able to determine if colleges and universities can enroll foreign students?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_149": {
+    "topline_id": "2025_June_149",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "The president should have the authority to ignore court rulings he disagrees with",
+    "pct": "17",
+    "count": ""
+  },
+  "2025_June_150": {
+    "topline_id": "2025_June_150",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "The president should be required to follow court rulings even if he disagrees with them",
+    "pct": "83",
+    "count": ""
+  },
+  "2025_June_151": {
+    "topline_id": "2025_June_151",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Don\u2019t know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_152": {
+    "topline_id": "2025_June_152",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_153": {
+    "topline_id": "2025_June_153",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which of the following comes closer to your own views, even if neither is exactly right?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_154": {
+    "topline_id": "2025_June_154",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "More of a supporter of the Make America Great Again or MAGA movement",
+    "pct": "44",
+    "count": ""
+  },
+  "2025_June_155": {
+    "topline_id": "2025_June_155",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "More of a supporter of the Republican Party",
+    "pct": "56",
+    "count": ""
+  },
+  "2025_June_156": {
+    "topline_id": "2025_June_156",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_157": {
+    "topline_id": "2025_June_157",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "When it comes to political matters, do you consider yourself to be:",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_158": {
+    "topline_id": "2025_June_158",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In talking to people about elections, we often find that a lot of people were not able to vote because they weren\u2019t registered, they were sick, or they just didn\u2019t have time. How about you, did you ha",
+    "response_label": "Yes, did vote",
+    "pct": "71",
+    "count": ""
+  },
+  "2025_June_159": {
+    "topline_id": "2025_June_159",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In talking to people about elections, we often find that a lot of people were not able to vote because they weren\u2019t registered, they were sick, or they just didn\u2019t have time. How about you, did you ha",
+    "response_label": "No, did not vote",
+    "pct": "29",
+    "count": ""
+  },
+  "2025_June_160": {
+    "topline_id": "2025_June_160",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In talking to people about elections, we often find that a lot of people were not able to vote because they weren\u2019t registered, they were sick, or they just didn\u2019t have time. How about you, did you ha",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_161": {
+    "topline_id": "2025_June_161",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "In talking to people about elections, we often find that a lot of people were not able to vote because they weren\u2019t registered, they were sick, or they just didn\u2019t have time. How about you, did you ha",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_162": {
+    "topline_id": "2025_June_162",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which presidential candidate did you vote for in 2024?",
+    "response_label": "I voted for Kamala Harris",
+    "pct": "48",
+    "count": ""
+  },
+  "2025_June_163": {
+    "topline_id": "2025_June_163",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which presidential candidate did you vote for in 2024?",
+    "response_label": "I voted for Donald Trump",
+    "pct": "45",
+    "count": ""
+  },
+  "2025_June_164": {
+    "topline_id": "2025_June_164",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which presidential candidate did you vote for in 2024?",
+    "response_label": "I voted for another candidate",
+    "pct": "7",
+    "count": ""
+  },
+  "2025_June_165": {
+    "topline_id": "2025_June_165",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which presidential candidate did you vote for in 2024?",
+    "response_label": "Don't know",
+    "pct": "0",
+    "count": ""
+  },
+  "2025_June_166": {
+    "topline_id": "2025_June_166",
+    "poll_date": "2025-June",
+    "year": "2025",
+    "month": "June",
+    "survey_name": "Vanderbilt Unity Poll",
+    "variable_name": "Which presidential candidate did you vote for in 2024?",
+    "response_label": "Refused",
+    "pct": "0",
+    "count": ""
+  }
+}