Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

Joseph Pollack commited on 19 days ago

Commit

188495c

1 Parent(s): dda90bf

adds docs , ci hf spaces

Browse files

Files changed (29) hide show

.github/scripts/deploy_to_hf_space.py +235 -0
.github/workflows/deploy-hf-space.yml +44 -0
dev/__init__.py +1 -0
docs/LICENSE.md +1 -0
examples/README.md +0 -184
examples/embeddings_demo/run_embeddings.py +0 -104
examples/full_stack_demo/run_full.py +0 -236
examples/hypothesis_demo/run_hypothesis.py +0 -142
examples/modal_demo/run_analysis.py +0 -64
examples/modal_demo/test_code_execution.py +0 -169
examples/modal_demo/verify_sandbox.py +0 -101
examples/orchestrator_demo/run_agent.py +0 -115
examples/orchestrator_demo/run_magentic.py +0 -96
examples/rate_limiting_demo.py +0 -82
examples/search_demo/run_search.py +0 -67
src/middleware/state_machine.py +1 -0
src/tools/searchxng_web_search.py +1 -0
src/tools/serper_web_search.py +1 -0
src/tools/vendored/crawl_website.py +1 -0
src/tools/vendored/searchxng_client.py +1 -0
src/tools/vendored/serper_client.py +1 -0
src/tools/vendored/web_search_core.py +1 -0
src/tools/web_search_factory.py +1 -0
src/utils/markdown.css +1 -0
src/utils/md_to_pdf.py +1 -0
src/utils/report_generator.py +1 -0
tests/unit/middleware/test_budget_tracker_phase7.py +1 -0
tests/unit/middleware/test_state_machine.py +1 -0
tests/unit/middleware/test_workflow_manager.py +1 -0

.github/scripts/deploy_to_hf_space.py ADDED Viewed

	@@ -0,0 +1,235 @@

+"""Deploy repository to Hugging Face Space, excluding unnecessary files."""
+import os
+import shutil
+from pathlib import Path
+from typing import Set
+from huggingface_hub import HfApi, Repository
+def get_excluded_dirs() -> Set[str]:
+    """Get set of directory names to exclude from deployment."""
+    return {
+        "docs",
+        "dev",
+        "folder",
+        "site",
+        "tests",  # Optional - can be included if desired
+        "examples",  # Optional - can be included if desired
+        ".git",
+        ".github",
+        "__pycache__",
+        ".pytest_cache",
+        ".mypy_cache",
+        ".ruff_cache",
+        ".venv",
+        "venv",
+        "env",
+        "ENV",
+        "node_modules",
+        ".cursor",
+        "reference_repos",
+        "burner_docs",
+        "chroma_db",
+        "logs",
+        "build",
+        "dist",
+        ".eggs",
+        "htmlcov",
+    }
+def get_excluded_files() -> Set[str]:
+    """Get set of file names to exclude from deployment."""
+    return {
+        ".pre-commit-config.yaml",
+        "mkdocs.yml",
+        "uv.lock",
+        "AGENTS.txt",
+        "CONTRIBUTING.md",
+        ".env",
+        ".env.local",
+        "*.local",
+        ".DS_Store",
+        "Thumbs.db",
+        "*.log",
+        ".coverage",
+        "coverage.xml",
+    }
+def should_exclude(path: Path, excluded_dirs: Set[str], excluded_files: Set[str]) -> bool:
+    """Check if a path should be excluded from deployment."""
+    # Check if any parent directory is excluded
+    for parent in path.parents:
+        if parent.name in excluded_dirs:
+            return True
+    # Check if the path itself is a directory that should be excluded
+    if path.is_dir() and path.name in excluded_dirs:
+        return True
+    # Check if the file name matches excluded patterns
+    if path.is_file():
+        # Check exact match
+        if path.name in excluded_files:
+            return True
+        # Check pattern matches (simple wildcard support)
+        for pattern in excluded_files:
+            if "*" in pattern:
+                # Simple pattern matching (e.g., "*.log")
+                suffix = pattern.replace("*", "")
+                if path.name.endswith(suffix):
+                    return True
+    return False
+def deploy_to_hf_space() -> None:
+    """Deploy repository to Hugging Face Space.
+    Supports both user and organization Spaces:
+    - User Space: username/space-name
+    - Organization Space: organization-name/space-name
+    Works with both classic tokens and fine-grained tokens.
+    """
+    # Get configuration from environment variables
+    hf_token = os.getenv("HF_TOKEN")
+    hf_username = os.getenv("HF_USERNAME")  # Can be username or organization name
+    space_name = os.getenv("HF_SPACE_NAME")
+    if not all([hf_token, hf_username, space_name]):
+        raise ValueError(
+            "Missing required environment variables: HF_TOKEN, HF_USERNAME, HF_SPACE_NAME"
+        )
+    # HF_USERNAME can be either a username or organization name
+    # Format: {username|organization}/{space_name}
+    repo_id = f"{hf_username}/{space_name}"
+    local_dir = "hf_space"
+    print(f"🚀 Deploying to Hugging Face Space: {repo_id}")
+    # Initialize HF API
+    api = HfApi(token=hf_token)
+    # Clone or create repository
+    try:
+        repo = Repository(
+            local_dir=local_dir,
+            clone_from=repo_id,
+            token=hf_token,
+            repo_type="space",
+        )
+        print(f"✅ Cloned existing Space: {repo_id}")
+    except Exception as e:
+        print(f"⚠️  Could not clone Space (may not exist yet): {e}")
+        # Create new repository
+        api.create_repo(
+            repo_id=space_name,
+            repo_type="space",
+            space_sdk="gradio",
+            token=hf_token,
+            exist_ok=True,
+        )
+        repo = Repository(
+            local_dir=local_dir,
+            clone_from=repo_id,
+            token=hf_token,
+            repo_type="space",
+        )
+        print(f"✅ Created new Space: {repo_id}")
+    # Get exclusion sets
+    excluded_dirs = get_excluded_dirs()
+    excluded_files = get_excluded_files()
+    # Remove all existing files in HF Space (except .git)
+    print("🧹 Cleaning existing files...")
+    for item in Path(local_dir).iterdir():
+        if item.name == ".git":
+            continue
+        if item.is_dir():
+            shutil.rmtree(item)
+        else:
+            item.unlink()
+    # Copy files from repository root
+    print("📦 Copying files...")
+    repo_root = Path(".")
+    files_copied = 0
+    dirs_copied = 0
+    for item in repo_root.rglob("*"):
+        # Skip if in .git directory
+        if ".git" in item.parts:
+            continue
+        # Skip if should be excluded
+        if should_exclude(item, excluded_dirs, excluded_files):
+            continue
+        # Calculate relative path
+        try:
+            rel_path = item.relative_to(repo_root)
+        except ValueError:
+            # Item is outside repo root, skip
+            continue
+        # Skip if in excluded directory
+        if any(part in excluded_dirs for part in rel_path.parts):
+            continue
+        # Destination path
+        dest_path = Path(local_dir) / rel_path
+        # Create parent directories
+        dest_path.parent.mkdir(parents=True, exist_ok=True)
+        # Copy file or directory
+        if item.is_file():
+            shutil.copy2(item, dest_path)
+            files_copied += 1
+        elif item.is_dir():
+            # Directory will be created by parent mkdir, but we track it
+            dirs_copied += 1
+    print(f"✅ Copied {files_copied} files and {dirs_copied} directories")
+    # Commit and push changes
+    print("💾 Committing changes...")
+    repo.git_add(auto_lfs_track=True)
+    # Check if there are changes to commit
+    try:
+        # Try to check if repo is clean (may not be available in all versions)
+        if hasattr(repo, "is_repo_clean") and repo.is_repo_clean():
+            print("ℹ️  No changes to commit (repository is up to date)")
+        else:
+            repo.git_commit("Deploy to Hugging Face Space [skip ci]")
+            print("📤 Pushing to Hugging Face Space...")
+            repo.git_push()
+            print("✅ Deployment complete!")
+    except Exception as e:
+        # If check fails, try to commit anyway (will fail gracefully if no changes)
+        try:
+            repo.git_commit("Deploy to Hugging Face Space [skip ci]")
+            print("📤 Pushing to Hugging Face Space...")
+            repo.git_push()
+            print("✅ Deployment complete!")
+        except Exception as commit_error:
+            # If commit fails, likely no changes
+            if "nothing to commit" in str(commit_error).lower():
+                print("ℹ️  No changes to commit (repository is up to date)")
+            else:
+                print(f"⚠️  Warning during commit: {commit_error}")
+                raise
+    print(f"🎉 Successfully deployed to: https://huggingface.co/spaces/{repo_id}")
+if __name__ == "__main__":
+    deploy_to_hf_space()

.github/workflows/deploy-hf-space.yml ADDED Viewed

	@@ -0,0 +1,44 @@

+name: Deploy to Hugging Face Space
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:  # Allow manual triggering
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      # No write permissions needed for GitHub repo (we're pushing to HF Space)
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install huggingface-hub
+      - name: Deploy to Hugging Face Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{ secrets.HF_USERNAME }}
+          HF_SPACE_NAME: ${{ secrets.HF_SPACE_NAME }}
+        run: |
+          python .github/scripts/deploy_to_hf_space.py
+      - name: Verify deployment
+        if: success()
+        run: |
+          echo "✅ Deployment completed successfully!"
+          echo "Space URL: https://huggingface.co/spaces/${{ secrets.HF_USERNAME }}/${{ secrets.HF_SPACE_NAME }}"

dev/__init__.py CHANGED Viewed

	@@ -3,3 +3,4 @@
3
4
5


3
4
5
6	+

docs/LICENSE.md CHANGED Viewed

@@ -24,3 +24,4 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.


24	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25	SOFTWARE.
26
27	+

examples/README.md DELETED Viewed

@@ -1,184 +0,0 @@
-# The DETERMINATOR Examples
-**NO MOCKS. NO FAKE DATA. REAL SCIENCE.**
-These demos run the REAL deep research pipeline with actual API calls.
----
-## Prerequisites
-You MUST have API keys configured:
-```bash
-# Copy the example and add your keys
-cp .env.example .env
-# Required (pick one):
-OPENAI_API_KEY=sk-...
-ANTHROPIC_API_KEY=sk-ant-...
-# Optional (higher PubMed rate limits):
-NCBI_API_KEY=your-key
-```
----
-## Examples
-### 1. Search Demo (No LLM Required)
-Demonstrates REAL parallel search across PubMed, ClinicalTrials.gov, and Europe PMC.
-```bash
-uv run python examples/search_demo/run_search.py "metformin cancer"
-```
-**What's REAL:**
-- Actual NCBI E-utilities API calls (PubMed)
-- Actual ClinicalTrials.gov API calls
-- Actual Europe PMC API calls (includes preprints)
-- Real papers, real trials, real preprints
----
-### 2. Embeddings Demo (No LLM Required)
-Demonstrates REAL semantic search and deduplication.
-```bash
-uv run python examples/embeddings_demo/run_embeddings.py
-```
-**What's REAL:**
-- Actual sentence-transformers model (all-MiniLM-L6-v2)
-- Actual ChromaDB vector storage
-- Real cosine similarity computations
-- Real semantic deduplication
----
-### 3. Orchestrator Demo (LLM Required)
-Demonstrates the REAL search-judge-synthesize loop.
-```bash
-uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
-uv run python examples/orchestrator_demo/run_agent.py "aspirin alzheimer" --iterations 5
-```
-**What's REAL:**
-- Real PubMed + ClinicalTrials + Europe PMC searches
-- Real LLM judge evaluating evidence quality
-- Real iterative refinement based on LLM decisions
-- Real research synthesis
----
-### 4. Magentic Demo (OpenAI Required)
-Demonstrates REAL multi-agent coordination using Microsoft Agent Framework.
-```bash
-# Requires OPENAI_API_KEY specifically
-uv run python examples/orchestrator_demo/run_magentic.py "metformin cancer"
-```
-**What's REAL:**
-- Real MagenticBuilder orchestration
-- Real SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent
-- Real manager-based coordination
----
-### 5. Hypothesis Demo (LLM Required)
-Demonstrates REAL mechanistic hypothesis generation.
-```bash
-uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
-uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
-```
-**What's REAL:**
-- Real PubMed + Web search first
-- Real embedding-based deduplication
-- Real LLM generating Drug -> Target -> Pathway -> Effect chains
-- Real knowledge gap identification
----
-### 6. Full-Stack Demo (LLM Required)
-**THE COMPLETE PIPELINE** - All phases working together.
-```bash
-uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
-uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
-```
-**What's REAL:**
-1. Real PubMed + ClinicalTrials + Europe PMC evidence collection
-2. Real embedding-based semantic deduplication
-3. Real LLM mechanistic hypothesis generation
-4. Real LLM evidence quality assessment
-5. Real LLM structured scientific report generation
-Output: Publication-quality research report with validated citations.
----
-## API Key Requirements
-| Example | LLM Required | Keys |
-|---------|--------------|------|
-| search_demo | No | Optional: `NCBI_API_KEY` |
-| embeddings_demo | No | None |
-| orchestrator_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
-| run_magentic | Yes | `OPENAI_API_KEY` (Magentic requires OpenAI) |
-| hypothesis_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
-| full_stack_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
----
-## Architecture
-```text
-User Query
-    |
-    v
-[REAL Search] --> PubMed + ClinicalTrials + Europe PMC APIs
-    |
-    v
-[REAL Embeddings] --> Actual sentence-transformers
-    |
-    v
-[REAL Hypothesis] --> Actual LLM reasoning
-    |
-    v
-[REAL Judge] --> Actual LLM assessment
-    |
-    +---> Need more? --> Loop back to Search
-    |
-    +---> Sufficient --> Continue
-    |
-    v
-[REAL Report] --> Actual LLM synthesis
-    |
-    v
-Publication-Quality Research Report
-```
----
-## Why No Mocks?
-> "Authenticity is the feature."
-Mocks belong in `tests/unit/`, not in demos. When you run these examples, you see:
-- Real papers from real databases
-- Real AI reasoning about real evidence
-- Real scientific hypotheses
-- Real research reports
-This is what The DETERMINATOR actually does. No fake data. No canned responses.

examples/embeddings_demo/run_embeddings.py DELETED Viewed

@@ -1,104 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: Semantic Search & Deduplication (Phase 6).
-This script demonstrates embedding-based capabilities using REAL data:
-- Fetches REAL abstracts from PubMed
-- Embeds text with sentence-transformers
-- Performs semantic deduplication on LIVE research data
-Usage:
-    uv run python examples/embeddings_demo/run_embeddings.py
-"""
-import asyncio
-from src.services.embeddings import EmbeddingService
-from src.tools.pubmed import PubMedTool
-def create_fresh_service(name_suffix: str = "") -> EmbeddingService:
-    """Create a fresh embedding service with unique collection name."""
-    import uuid
-    # Create service with unique collection by modifying the internal collection
-    service = EmbeddingService.__new__(EmbeddingService)
-    service._model = __import__("sentence_transformers").SentenceTransformer("all-MiniLM-L6-v2")
-    service._client = __import__("chromadb").Client()
-    collection_name = f"demo_{name_suffix}_{uuid.uuid4().hex[:8]}"
-    service._collection = service._client.create_collection(
-        name=collection_name, metadata={"hnsw:space": "cosine"}
-    )
-    return service
-async def demo_real_pipeline() -> None:
-    """Run the demo using REAL PubMed data."""
-    print("\n" + "=" * 60)
-    print("DeepCritical Embeddings Demo (REAL DATA)")
-    print("=" * 60)
-    # 1. Fetch Real Data
-    query = "metformin mechanism of action"
-    print(f"\n[1] Fetching real papers for: '{query}'...")
-    pubmed = PubMedTool()
-    # Fetch enough results to likely get some overlap/redundancy
-    evidence = await pubmed.search(query, max_results=10)
-    print(f"    Found {len(evidence)} papers.")
-    print("\n    Sample Titles:")
-    for i, e in enumerate(evidence[:3], 1):
-        print(f"    {i}. {e.citation.title[:80]}...")
-    # 2. Embed Data
-    print("\n[2] Embedding abstracts (sentence-transformers)...")
-    service = create_fresh_service("real_demo")
-    # 3. Semantic Search
-    print("\n[3] Semantic Search Demo")
-    print("    Indexing evidence...")
-    for e in evidence:
-        # Use URL as ID for uniqueness
-        await service.add_evidence(
-            evidence_id=e.citation.url,
-            content=e.content,
-            metadata={
-                "source": e.citation.source,
-                "title": e.citation.title,
-                "date": e.citation.date,
-            },
-        )
-    semantic_query = "activation of AMPK pathway"
-    print(f"    Searching for concept: '{semantic_query}'")
-    results = await service.search_similar(semantic_query, n_results=2)
-    print("    Top matches:")
-    for i, r in enumerate(results, 1):
-        similarity = 1 - r["distance"]
-        print(f"    {i}. [{similarity:.1%} match] {r['metadata']['title'][:70]}...")
-    # 4. Semantic Deduplication
-    print("\n[4] Semantic Deduplication Demo")
-    # Create a FRESH service for deduplication so we don't clash with Step 3's index
-    dedup_service = create_fresh_service("dedup_demo")
-    print("    Checking for redundant papers (threshold=0.85)...")
-    # To force a duplicate for demo purposes, let's double the evidence list
-    # simulating finding the same papers again or very similar ones
-    duplicated_evidence = evidence + evidence[:2]
-    print(f"    Input pool: {len(duplicated_evidence)} items (with artificial duplicates added)")
-    unique = await dedup_service.deduplicate(duplicated_evidence, threshold=0.85)
-    print(f"    Output pool: {len(unique)} unique items")
-    print(f"    Removed {len(duplicated_evidence) - len(unique)} duplicates.")
-    print("\n" + "=" * 60)
-    print("Demo complete! Verified with REAL PubMed data.")
-    print("=" * 60 + "\n")
-if __name__ == "__main__":
-    asyncio.run(demo_real_pipeline())

examples/full_stack_demo/run_full.py DELETED Viewed

@@ -1,236 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: Full Stack DETERMINATOR Agent (Phases 1-8).
-This script demonstrates the COMPLETE REAL deep research pipeline:
-- Phase 2: REAL Search (PubMed + ClinicalTrials + Europe PMC)
-- Phase 6: REAL Embeddings (sentence-transformers + ChromaDB)
-- Phase 7: REAL Hypothesis (LLM mechanistic reasoning)
-- Phase 3: REAL Judge (LLM evidence assessment)
-- Phase 8: REAL Report (LLM structured scientific report)
-NO MOCKS. NO FAKE DATA. REAL SCIENCE.
-Usage:
-    uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
-    uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
-Requires: OPENAI_API_KEY or ANTHROPIC_API_KEY
-"""
-import argparse
-import asyncio
-import os
-import sys
-from typing import Any
-from src.utils.models import Evidence
-def print_header(title: str) -> None:
-    """Print a formatted section header."""
-    print(f"\n{'=' * 70}")
-    print(f"  {title}")
-    print(f"{'=' * 70}\n")
-def print_step(step: int, name: str) -> None:
-    """Print a step indicator."""
-    print(f"\n[Step {step}] {name}")
-    print("-" * 50)
-_MAX_DISPLAY_LEN = 600
-def _print_truncated(text: str) -> None:
-    """Print text, truncating if too long."""
-    if len(text) > _MAX_DISPLAY_LEN:
-        print(text[:_MAX_DISPLAY_LEN] + "\n... [truncated for display]")
-    else:
-        print(text)
-async def _run_search_iteration(
-    query: str,
-    iteration: int,
-    evidence_store: dict[str, Any],
-    all_evidence: list[Evidence],
-    search_handler: Any,
-    embedding_service: Any,
-) -> list[Evidence]:
-    """Run a single search iteration with deduplication."""
-    search_queries = [query]
-    if evidence_store.get("hypotheses"):
-        for h in evidence_store["hypotheses"][-2:]:
-            search_queries.extend(h.search_suggestions[:1])
-    for q in search_queries[:2]:
-        result = await search_handler.execute(q, max_results_per_tool=5)
-        print(f"  '{q}' -> {result.total_found} results")
-        new_unique = await embedding_service.deduplicate(result.evidence)
-        print(f"  After dedup: {len(new_unique)} unique")
-        all_evidence.extend(new_unique)
-    evidence_store["current"] = all_evidence
-    evidence_store["iteration_count"] = iteration
-    return all_evidence
-async def _handle_judge_step(
-    judge_handler: Any, query: str, all_evidence: list[Evidence], evidence_store: dict[str, Any]
-) -> tuple[bool, str]:
-    """Handle the judge assessment step. Returns (should_stop, next_query)."""
-    print("\n[Judge] Assessing evidence quality (REAL LLM)...")
-    assessment = await judge_handler.assess(query, all_evidence)
-    print(f"  Mechanism Score: {assessment.details.mechanism_score}/10")
-    print(f"  Clinical Score:  {assessment.details.clinical_evidence_score}/10")
-    print(f"  Confidence:      {assessment.confidence:.0%}")
-    print(f"  Recommendation:  {assessment.recommendation.upper()}")
-    if assessment.recommendation == "synthesize":
-        print("\n[Judge] Evidence sufficient! Proceeding to report generation...")
-        evidence_store["last_assessment"] = assessment.details.model_dump()
-        return True, query
-    next_queries = assessment.next_search_queries[:2] if assessment.next_search_queries else []
-    if next_queries:
-        print(f"\n[Judge] Need more evidence. Next queries: {next_queries}")
-        return False, next_queries[0]
-    print("\n[Judge] Need more evidence but no suggested queries. Continuing with original query.")
-    return False, query
-async def run_full_demo(query: str, max_iterations: int) -> None:
-    """Run the REAL full stack pipeline."""
-    print_header("DeepCritical Full Stack Demo (REAL)")
-    print(f"Query: {query}")
-    print(f"Max iterations: {max_iterations}")
-    print("Mode: REAL (All live API calls - no mocks)\n")
-    # Import real components
-    from src.agent_factory.judges import JudgeHandler
-    from src.agents.hypothesis_agent import HypothesisAgent
-    from src.agents.report_agent import ReportAgent
-    from src.services.embeddings import EmbeddingService
-    from src.tools.clinicaltrials import ClinicalTrialsTool
-    from src.tools.europepmc import EuropePMCTool
-    from src.tools.pubmed import PubMedTool
-    from src.tools.search_handler import SearchHandler
-    # Initialize REAL services
-    print("[Init] Loading embedding model...")
-    embedding_service = EmbeddingService()
-    search_handler = SearchHandler(
-        tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
-    )
-    judge_handler = JudgeHandler()
-    # Shared evidence store
-    evidence_store: dict[str, Any] = {"current": [], "hypotheses": [], "iteration_count": 0}
-    all_evidence: list[Evidence] = []
-    for iteration in range(1, max_iterations + 1):
-        print_step(iteration, f"ITERATION {iteration}/{max_iterations}")
-        # Step 1: REAL Search
-        print("\n[Search] Querying PubMed + ClinicalTrials + Europe PMC (REAL API calls)...")
-        all_evidence = await _run_search_iteration(
-            query, iteration, evidence_store, all_evidence, search_handler, embedding_service
-        )
-        if not all_evidence:
-            print("\nNo evidence found. Try a different query.")
-            return
-        # Step 2: REAL Hypothesis generation (first iteration only)
-        if iteration == 1:
-            print("\n[Hypothesis] Generating mechanistic hypotheses (REAL LLM)...")
-            hypothesis_agent = HypothesisAgent(evidence_store, embedding_service)
-            hyp_response = await hypothesis_agent.run(query)
-            _print_truncated(hyp_response.messages[0].text)
-        # Step 3: REAL Judge
-        should_stop, query = await _handle_judge_step(
-            judge_handler, query, all_evidence, evidence_store
-        )
-        if should_stop:
-            break
-    # Step 4: REAL Report generation
-    print_step(iteration + 1, "REPORT GENERATION (REAL LLM)")
-    report_agent = ReportAgent(evidence_store, embedding_service)
-    report_response = await report_agent.run(query)
-    print("\n" + "=" * 70)
-    print("  FINAL RESEARCH REPORT")
-    print("=" * 70)
-    print(report_response.messages[0].text)
-async def main() -> None:
-    """Entry point."""
-    parser = argparse.ArgumentParser(
-        description="DeepCritical Full Stack Demo - REAL, No Mocks",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-This demo runs the COMPLETE pipeline with REAL API calls:
-  1. REAL search: Actual PubMed queries
-  2. REAL embeddings: Actual sentence-transformers model
-  3. REAL hypothesis: Actual LLM generating mechanistic chains
-  4. REAL judge: Actual LLM assessing evidence quality
-  5. REAL report: Actual LLM generating structured report
-Examples:
-    uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
-    uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
-    uv run python examples/full_stack_demo/run_full.py "aspirin cancer prevention"
-        """,
-    )
-    parser.add_argument(
-        "query",
-        help="Research query (e.g., 'metformin Alzheimer's disease')",
-    )
-    parser.add_argument(
-        "-i",
-        "--iterations",
-        type=int,
-        default=2,
-        help="Max search iterations (default: 2)",
-    )
-    args = parser.parse_args()
-    if args.iterations < 1:
-        print("Error: iterations must be at least 1")
-        sys.exit(1)
-    # Fail fast: require API key
-    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
-        print("=" * 70)
-        print("ERROR: This demo requires a real LLM.")
-        print()
-        print("Set one of the following in your .env file:")
-        print("  OPENAI_API_KEY=sk-...")
-        print("  ANTHROPIC_API_KEY=sk-ant-...")
-        print()
-        print("This is a REAL demo. No mocks. No fake data.")
-        print("=" * 70)
-        sys.exit(1)
-    await run_full_demo(args.query, args.iterations)
-    print("\n" + "=" * 70)
-    print("  DeepCritical Full Stack Demo Complete!")
-    print("  ")
-    print("  Everything you just saw was REAL:")
-    print("    - Real PubMed + ClinicalTrials + Europe PMC searches")
-    print("    - Real embedding computations")
-    print("    - Real LLM reasoning")
-    print("    - Real scientific report")
-    print("=" * 70 + "\n")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/hypothesis_demo/run_hypothesis.py DELETED Viewed

@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: Hypothesis Generation (Phase 7).
-This script demonstrates the REAL hypothesis generation pipeline:
-1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
-2. REAL embeddings: Semantic deduplication
-3. REAL LLM: Mechanistic hypothesis generation
-Usage:
-    # Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
-    uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
-    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
-"""
-import argparse
-import asyncio
-import os
-import sys
-from typing import Any
-from src.agents.hypothesis_agent import HypothesisAgent
-from src.services.embeddings import EmbeddingService
-from src.tools.clinicaltrials import ClinicalTrialsTool
-from src.tools.europepmc import EuropePMCTool
-from src.tools.pubmed import PubMedTool
-from src.tools.search_handler import SearchHandler
-async def run_hypothesis_demo(query: str) -> None:
-    """Run the REAL hypothesis generation pipeline."""
-    try:
-        print(f"\n{'=' * 60}")
-        print("DeepCritical Hypothesis Agent Demo (Phase 7)")
-        print(f"Query: {query}")
-        print("Mode: REAL (Live API calls)")
-        print(f"{'=' * 60}\n")
-        # Step 1: REAL Search
-        print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
-        search_handler = SearchHandler(
-            tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
-        )
-        result = await search_handler.execute(query, max_results_per_tool=5)
-        print(f"  Found {result.total_found} results from {result.sources_searched}")
-        if result.errors:
-            print(f"  Warnings: {result.errors}")
-        if not result.evidence:
-            print("\nNo evidence found. Try a different query.")
-            return
-        # Step 2: REAL Embeddings - Deduplicate
-        print("\n[Step 2] Semantic deduplication...")
-        embedding_service = EmbeddingService()
-        unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
-        print(f"  {len(result.evidence)} -> {len(unique_evidence)} unique papers")
-        # Show what we found
-        print("\n[Evidence collected]")
-        max_title_len = 50
-        for i, e in enumerate(unique_evidence[:5], 1):
-            raw_title = e.citation.title
-            if len(raw_title) > max_title_len:
-                title = raw_title[:max_title_len] + "..."
-            else:
-                title = raw_title
-            print(f"  {i}. [{e.citation.source.upper()}] {title}")
-        # Step 3: REAL LLM - Generate hypotheses
-        print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
-        evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
-        agent = HypothesisAgent(evidence_store, embedding_service)
-        print("-" * 60)
-        response = await agent.run(query)
-        print(response.messages[0].text)
-        print("-" * 60)
-        # Show stored hypotheses
-        hypotheses = evidence_store.get("hypotheses", [])
-        print(f"\n{len(hypotheses)} hypotheses stored")
-        if hypotheses:
-            print("\nGenerated search queries for further investigation:")
-            for h in hypotheses:
-                queries = h.to_search_queries()
-                print(f"  {h.drug} -> {h.target}:")
-                for q in queries[:3]:
-                    print(f"    - {q}")
-    except Exception as e:
-        print(f"\n❌ Error during hypothesis generation: {e}")
-        raise
-async def main() -> None:
-    """Entry point."""
-    parser = argparse.ArgumentParser(
-        description="Hypothesis Generation Demo (REAL - No Mocks)",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-    uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
-    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
-    uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention"
-        """,
-    )
-    parser.add_argument(
-        "query",
-        nargs="?",
-        default="metformin Alzheimer's disease",
-        help="Research query",
-    )
-    args = parser.parse_args()
-    # Fail fast: require API key
-    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
-        print("=" * 60)
-        print("ERROR: This demo requires a real LLM.")
-        print()
-        print("Set one of the following in your .env file:")
-        print("  OPENAI_API_KEY=sk-...")
-        print("  ANTHROPIC_API_KEY=sk-ant-...")
-        print()
-        print("This is a REAL demo, not a mock. No fake data.")
-        print("=" * 60)
-        sys.exit(1)
-    await run_hypothesis_demo(args.query)
-    print("\n" + "=" * 60)
-    print("Demo complete! This was a REAL pipeline:")
-    print("  1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
-    print("  2. REAL embeddings: Actual sentence-transformers")
-    print("  3. REAL LLM: Actual hypothesis generation")
-    print("=" * 60 + "\n")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/modal_demo/run_analysis.py DELETED Viewed

@@ -1,64 +0,0 @@
-#!/usr/bin/env python3
-"""Demo: Modal-powered statistical analysis.
-This script uses StatisticalAnalyzer directly (NO agent_framework dependency).
-Usage:
-    uv run python examples/modal_demo/run_analysis.py "metformin alzheimer"
-"""
-import argparse
-import asyncio
-import os
-import sys
-from src.services.statistical_analyzer import get_statistical_analyzer
-from src.tools.pubmed import PubMedTool
-from src.utils.config import settings
-async def main() -> None:
-    """Run the Modal analysis demo."""
-    parser = argparse.ArgumentParser(description="Modal Analysis Demo")
-    parser.add_argument("query", help="Research query")
-    args = parser.parse_args()
-    if not settings.modal_available:
-        print("Error: Modal credentials not configured.")
-        sys.exit(1)
-    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
-        print("Error: No LLM API key found.")
-        sys.exit(1)
-    print(f"\n{'=' * 60}")
-    print("DeepCritical Modal Analysis Demo")
-    print(f"Query: {args.query}")
-    print(f"{'=' * 60}\n")
-    # Step 1: Gather Evidence
-    print("Step 1: Gathering evidence from PubMed...")
-    pubmed = PubMedTool()
-    evidence = await pubmed.search(args.query, max_results=5)
-    print(f"  Found {len(evidence)} papers\n")
-    # Step 2: Run Modal Analysis
-    print("Step 2: Running statistical analysis in Modal sandbox...")
-    analyzer = get_statistical_analyzer()
-    result = await analyzer.analyze(query=args.query, evidence=evidence)
-    # Step 3: Display Results
-    print("\n" + "=" * 60)
-    print("ANALYSIS RESULTS")
-    print("=" * 60)
-    print(f"\nVerdict: {result.verdict}")
-    print(f"Confidence: {result.confidence:.0%}")
-    print("\nKey Findings:")
-    for finding in result.key_findings:
-        print(f"  - {finding}")
-    print("\n[Demo Complete - Code executed in Modal, not locally]")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/modal_demo/test_code_execution.py DELETED Viewed

@@ -1,169 +0,0 @@
-"""Demo script to test Modal code execution integration.
-Run with: uv run python examples/modal_demo/test_code_execution.py
-"""
-import sys
-from pathlib import Path
-# Add src to path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
-from src.tools.code_execution import CodeExecutionError, get_code_executor
-def test_basic_execution():
-    """Test basic code execution."""
-    print("\n=== Test 1: Basic Execution ===")
-    executor = get_code_executor()
-    code = """
-print("Hello from Modal sandbox!")
-result = 2 + 2
-print(f"2 + 2 = {result}")
-"""
-    result = executor.execute(code)
-    print(f"Success: {result['success']}")
-    print(f"Stdout:\n{result['stdout']}")
-    if result["stderr"]:
-        print(f"Stderr:\n{result['stderr']}")
-def test_scientific_computing():
-    """Test scientific computing libraries."""
-    print("\n=== Test 2: Scientific Computing ===")
-    executor = get_code_executor()
-    code = """
-import pandas as pd
-import numpy as np
-# Create sample data
-data = {
-    'drug': ['DrugA', 'DrugB', 'DrugC'],
-    'efficacy': [0.75, 0.82, 0.68],
-    'sample_size': [100, 150, 120]
-}
-df = pd.DataFrame(data)
-# Calculate weighted average
-weighted_avg = np.average(df['efficacy'], weights=df['sample_size'])
-print(f"Drugs tested: {len(df)}")
-print(f"Weighted average efficacy: {weighted_avg:.3f}")
-print("\\nDataFrame:")
-print(df.to_string())
-"""
-    result = executor.execute(code)
-    print(f"Success: {result['success']}")
-    print(f"Output:\n{result['stdout']}")
-def test_statistical_analysis():
-    """Test statistical analysis."""
-    print("\n=== Test 3: Statistical Analysis ===")
-    executor = get_code_executor()
-    code = """
-import numpy as np
-from scipy import stats
-# Simulate two treatment groups
-np.random.seed(42)
-control_group = np.random.normal(100, 15, 50)
-treatment_group = np.random.normal(110, 15, 50)
-# Perform t-test
-t_stat, p_value = stats.ttest_ind(treatment_group, control_group)
-print(f"Control mean: {np.mean(control_group):.2f}")
-print(f"Treatment mean: {np.mean(treatment_group):.2f}")
-print(f"T-statistic: {t_stat:.3f}")
-print(f"P-value: {p_value:.4f}")
-if p_value < 0.05:
-    print("Result: Statistically significant difference")
-else:
-    print("Result: No significant difference")
-"""
-    result = executor.execute(code)
-    print(f"Success: {result['success']}")
-    print(f"Output:\n{result['stdout']}")
-def test_with_return_value():
-    """Test execute_with_return method."""
-    print("\n=== Test 4: Return Value ===")
-    executor = get_code_executor()
-    code = """
-import numpy as np
-# Calculate something
-data = np.array([1, 2, 3, 4, 5])
-result = {
-    'mean': float(np.mean(data)),
-    'std': float(np.std(data)),
-    'sum': int(np.sum(data))
-}
-"""
-    try:
-        result = executor.execute_with_return(code)
-        print(f"Returned result: {result}")
-        print(f"Mean: {result['mean']}")
-        print(f"Std: {result['std']}")
-        print(f"Sum: {result['sum']}")
-    except CodeExecutionError as e:
-        print(f"Error: {e}")
-def test_error_handling():
-    """Test error handling."""
-    print("\n=== Test 5: Error Handling ===")
-    executor = get_code_executor()
-    code = """
-# This will fail
-x = 1 / 0
-"""
-    result = executor.execute(code)
-    print(f"Success: {result['success']}")
-    print(f"Error: {result['error']}")
-def main():
-    """Run all tests."""
-    print("=" * 60)
-    print("Modal Code Execution Demo")
-    print("=" * 60)
-    tests = [
-        test_basic_execution,
-        test_scientific_computing,
-        test_statistical_analysis,
-        test_with_return_value,
-        test_error_handling,
-    ]
-    for test in tests:
-        try:
-            test()
-        except Exception as e:
-            print(f"\n❌ Test failed: {e}")
-            import traceback
-            traceback.print_exc()
-    print("\n" + "=" * 60)
-    print("Demo completed!")
-    print("=" * 60)
-if __name__ == "__main__":
-    main()

examples/modal_demo/verify_sandbox.py DELETED Viewed

@@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-"""Verify that Modal sandbox is properly isolated.
-This script proves to judges that code runs in Modal, not locally.
-NO agent_framework dependency - uses only src.tools.code_execution.
-Usage:
-    uv run python examples/modal_demo/verify_sandbox.py
-"""
-import asyncio
-from functools import partial
-from src.tools.code_execution import CodeExecutionError, get_code_executor
-from src.utils.config import settings
-def print_result(result: dict) -> None:
-    """Print execution result, surfacing errors when they occur."""
-    if result.get("success"):
-        print(f"  {result['stdout'].strip()}\n")
-    else:
-        error = result.get("error") or result.get("stderr", "").strip() or "Unknown error"
-        print(f"  ERROR: {error}\n")
-async def main() -> None:
-    """Verify Modal sandbox isolation."""
-    if not settings.modal_available:
-        print("Error: Modal credentials not configured.")
-        print("Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in .env")
-        return
-    try:
-        executor = get_code_executor()
-        loop = asyncio.get_running_loop()
-        print("=" * 60)
-        print("Modal Sandbox Isolation Verification")
-        print("=" * 60 + "\n")
-        # Test 1: Hostname
-        print("Test 1: Check hostname (should NOT be your machine)")
-        code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
-        result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
-        print_result(result1)
-        # Test 2: Scientific libraries
-        print("Test 2: Verify scientific libraries")
-        code2 = """
-import pandas as pd
-import numpy as np
-import scipy
-print(f"pandas: {pd.__version__}")
-print(f"numpy: {np.__version__}")
-print(f"scipy: {scipy.__version__}")
-"""
-        result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
-        print_result(result2)
-        # Test 3: Network blocked
-        print("Test 3: Verify network isolation")
-        code3 = """
-import urllib.request
-try:
-    urllib.request.urlopen("https://google.com", timeout=2)
-    print("Network: ALLOWED (unexpected!)")
-except Exception:
-    print("Network: BLOCKED (as expected)")
-"""
-        result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
-        print_result(result3)
-        # Test 4: Real statistics
-        print("Test 4: Execute statistical analysis")
-        code4 = """
-import pandas as pd
-import scipy.stats as stats
-data = pd.DataFrame({'effect': [0.42, 0.38, 0.51]})
-mean = data['effect'].mean()
-t_stat, p_val = stats.ttest_1samp(data['effect'], 0)
-print(f"Mean Effect: {mean:.3f}")
-print(f"P-value: {p_val:.4f}")
-print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
-"""
-        result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
-        print_result(result4)
-        print("=" * 60)
-        print("All tests complete - Modal sandbox verified!")
-        print("=" * 60)
-    except CodeExecutionError as e:
-        print(f"Error: Modal code execution failed: {e}")
-        print("Hint: Ensure Modal SDK is installed and credentials are valid.")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/orchestrator_demo/run_agent.py DELETED Viewed

@@ -1,115 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: DeepCritical Agent Loop (Search + Judge + Orchestrator).
-This script demonstrates the REAL Phase 4 orchestration:
-- REAL Iterative Search (PubMed + ClinicalTrials + Europe PMC)
-- REAL Evidence Evaluation (LLM Judge)
-- REAL Orchestration Loop
-- REAL Final Synthesis
-NO MOCKS. REAL API CALLS.
-Usage:
-    uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
-    uv run python examples/orchestrator_demo/run_agent.py "sildenafil heart failure" --iterations 5
-Requires: OPENAI_API_KEY or ANTHROPIC_API_KEY
-"""
-import argparse
-import asyncio
-import os
-import sys
-from src.agent_factory.judges import JudgeHandler
-from src.orchestrator import Orchestrator
-from src.tools.clinicaltrials import ClinicalTrialsTool
-from src.tools.europepmc import EuropePMCTool
-from src.tools.pubmed import PubMedTool
-from src.tools.search_handler import SearchHandler
-from src.utils.models import OrchestratorConfig
-MAX_ITERATIONS = 10
-async def main() -> None:
-    """Run the REAL agent demo."""
-    parser = argparse.ArgumentParser(
-        description="DeepCritical Agent Demo - REAL, No Mocks",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-This demo runs the REAL search-judge-synthesize loop:
-  1. REAL search: PubMed + ClinicalTrials + Europe PMC queries
-  2. REAL judge: Actual LLM assessing evidence quality
-  3. REAL loop: Actual iterative refinement based on LLM decisions
-  4. REAL synthesis: Actual research summary generation
-Examples:
-    uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
-    uv run python examples/orchestrator_demo/run_agent.py "aspirin alzheimer" --iterations 5
-        """,
-    )
-    parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
-    parser.add_argument("--iterations", type=int, default=3, help="Max iterations (default: 3)")
-    args = parser.parse_args()
-    if not 1 <= args.iterations <= MAX_ITERATIONS:
-        print(f"Error: iterations must be between 1 and {MAX_ITERATIONS}")
-        sys.exit(1)
-    # Fail fast: require API key
-    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
-        print("=" * 60)
-        print("ERROR: This demo requires a real LLM.")
-        print()
-        print("Set one of the following in your .env file:")
-        print("  OPENAI_API_KEY=sk-...")
-        print("  ANTHROPIC_API_KEY=sk-ant-...")
-        print()
-        print("This is a REAL demo. No mocks. No fake data.")
-        print("=" * 60)
-        sys.exit(1)
-    print(f"\n{'=' * 60}")
-    print("DeepCritical Agent Demo (REAL)")
-    print(f"Query: {args.query}")
-    print(f"Max Iterations: {args.iterations}")
-    print("Mode: REAL (All live API calls)")
-    print(f"{'=' * 60}\n")
-    # Setup REAL components
-    search_handler = SearchHandler(
-        tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
-    )
-    judge_handler = JudgeHandler()  # REAL LLM judge
-    config = OrchestratorConfig(max_iterations=args.iterations)
-    orchestrator = Orchestrator(
-        search_handler=search_handler, judge_handler=judge_handler, config=config
-    )
-    # Run the REAL loop
-    try:
-        async for event in orchestrator.run(args.query):
-            # Print event with icon (remove markdown bold for CLI)
-            print(event.to_markdown().replace("**", ""))
-            # Show search results count
-            if event.type == "search_complete" and event.data:
-                print(f"   -> Found {event.data.get('new_count', 0)} new items")
-    except Exception as e:
-        print(f"\n❌ Error: {e}")
-        raise
-    print("\n" + "=" * 60)
-    print("Demo complete! Everything was REAL:")
-    print("  - Real PubMed + ClinicalTrials + Europe PMC searches")
-    print("  - Real LLM judge decisions")
-    print("  - Real iterative refinement")
-    print("=" * 60 + "\n")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/orchestrator_demo/run_magentic.py DELETED Viewed

@@ -1,96 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: Magentic-One Orchestrator for DeepCritical.
-This script demonstrates Phase 5 functionality:
-- Multi-Agent Coordination (Searcher + Judge + Manager)
-- Magentic-One Workflow
-Usage:
-    export OPENAI_API_KEY=...
-    uv run python examples/orchestrator_demo/run_magentic.py "metformin cancer"
-"""
-import argparse
-import asyncio
-import os
-import sys
-from src.agent_factory.judges import JudgeHandler
-from src.orchestrator_factory import create_orchestrator
-from src.tools.clinicaltrials import ClinicalTrialsTool
-from src.tools.europepmc import EuropePMCTool
-from src.tools.pubmed import PubMedTool
-from src.tools.search_handler import SearchHandler
-from src.utils.models import OrchestratorConfig
-async def main() -> None:
-    """Run the magentic agent demo."""
-    parser = argparse.ArgumentParser(description="Run DeepCritical Magentic Agent")
-    parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
-    parser.add_argument("--iterations", type=int, default=10, help="Max rounds")
-    args = parser.parse_args()
-    # Check for OpenAI key specifically - Magentic requires function calling
-    # which is only supported by OpenAI's API (not Anthropic or HF Inference)
-    if not os.getenv("OPENAI_API_KEY"):
-        print("Error: OPENAI_API_KEY required. Magentic uses function calling")
-        print("       which requires OpenAI's API. For other providers, use mode='simple'.")
-        sys.exit(1)
-    print(f"\n{'=' * 60}")
-    print("DeepCritical Magentic Agent Demo")
-    print(f"Query: {args.query}")
-    print("Mode: MAGENTIC (Multi-Agent)")
-    print(f"{'=' * 60}\n")
-    # 1. Setup Search Tools
-    search_handler = SearchHandler(
-        tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
-    )
-    # 2. Setup Judge
-    judge_handler = JudgeHandler()
-    # 3. Setup Orchestrator via Factory
-    config = OrchestratorConfig(max_iterations=args.iterations)
-    orchestrator = create_orchestrator(
-        search_handler=search_handler,
-        judge_handler=judge_handler,
-        config=config,
-        mode="magentic",
-    )
-    if not orchestrator:
-        print("Failed to create Magentic orchestrator. Is agent-framework installed?")
-        sys.exit(1)
-    # 4. Run Loop
-    try:
-        async for event in orchestrator.run(args.query):
-            # Print event with icon
-            # Clean up markdown for CLI
-            msg_obj = event.message
-            msg_text = ""
-            if hasattr(msg_obj, "text"):
-                msg_text = msg_obj.text
-            else:
-                msg_text = str(msg_obj)
-            msg = msg_text.replace("\n", " ").replace("**", "")[:150]
-            print(f"[{event.type.upper()}] {msg}...")
-            if event.type == "complete":
-                print("\n--- FINAL OUTPUT ---\n")
-                print(msg_text)
-    except Exception as e:
-        print(f"\n❌ Error: {e}")
-        import traceback
-        traceback.print_exc()
-if __name__ == "__main__":
-    asyncio.run(main())

examples/rate_limiting_demo.py DELETED Viewed

@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-"""Demo script to verify rate limiting works correctly."""
-import asyncio
-import time
-from src.tools.pubmed import PubMedTool
-from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
-async def test_basic_limiter():
-    """Test basic rate limiter behavior."""
-    print("=" * 60)
-    print("Rate Limiting Demo")
-    print("=" * 60)
-    # Test 1: Basic limiter
-    print("\n[Test 1] Testing 3/second limiter...")
-    limiter = RateLimiter("3/second")
-    start = time.monotonic()
-    for i in range(6):
-        await limiter.acquire()
-        elapsed = time.monotonic() - start
-        print(f"  Request {i + 1} at {elapsed:.2f}s")
-    total = time.monotonic() - start
-    print(f"  Total time for 6 requests: {total:.2f}s (expected ~2s)")
-async def test_pubmed_limiter():
-    """Test PubMed-specific limiter."""
-    print("\n[Test 2] Testing PubMed limiter (shared)...")
-    reset_pubmed_limiter()  # Clean state
-    # Without API key: 3/sec
-    limiter = get_pubmed_limiter(api_key=None)
-    print(f"  Rate without key: {limiter.rate}")
-    # Multiple tools should share the same limiter
-    tool1 = PubMedTool()
-    tool2 = PubMedTool()
-    # Verify they share the limiter
-    print(f"  Tools share limiter: {tool1._limiter is tool2._limiter}")
-async def test_concurrent_requests():
-    """Test rate limiting under concurrent load."""
-    print("\n[Test 3] Testing concurrent request limiting...")
-    limiter = RateLimiter("5/second")
-    async def make_request(i: int):
-        await limiter.acquire()
-        return time.monotonic()
-    start = time.monotonic()
-    # Launch 10 concurrent requests
-    tasks = [make_request(i) for i in range(10)]
-    times = await asyncio.gather(*tasks)
-    # Calculate distribution
-    relative_times = [t - start for t in times]
-    print(f"  Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
-    total = max(relative_times)
-    print(f"  All 10 requests completed in {total:.2f}s (expected ~2s)")
-async def main():
-    await test_basic_limiter()
-    await test_pubmed_limiter()
-    await test_concurrent_requests()
-    print("\n" + "=" * 60)
-    print("Demo complete!")
-if __name__ == "__main__":
-    asyncio.run(main())

examples/search_demo/run_search.py DELETED Viewed

@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-"""
-Demo: Search for biomedical research evidence.
-This script demonstrates multi-source search functionality:
-- PubMed search (biomedical literature)
-- ClinicalTrials.gov search (clinical trial evidence)
-- SearchHandler (parallel scatter-gather orchestration)
-Usage:
-    # From project root:
-    uv run python examples/search_demo/run_search.py
-    # With custom query:
-    uv run python examples/search_demo/run_search.py "metformin cancer"
-Requirements:
-    - Optional: NCBI_API_KEY in .env for higher PubMed rate limits
-"""
-import asyncio
-import sys
-from src.tools.clinicaltrials import ClinicalTrialsTool
-from src.tools.europepmc import EuropePMCTool
-from src.tools.pubmed import PubMedTool
-from src.tools.search_handler import SearchHandler
-async def main(query: str) -> None:
-    """Run search demo with the given query."""
-    print(f"\n{'=' * 60}")
-    print("The DETERMINATOR Search Demo")
-    print(f"Query: {query}")
-    print(f"{'=' * 60}\n")
-    # Initialize tools
-    pubmed = PubMedTool()
-    trials = ClinicalTrialsTool()
-    preprints = EuropePMCTool()
-    handler = SearchHandler(tools=[pubmed, trials, preprints], timeout=30.0)
-    # Execute search
-    print("Searching PubMed, ClinicalTrials.gov, and Europe PMC in parallel...")
-    result = await handler.execute(query, max_results_per_tool=5)
-    # Display results
-    print(f"\n{'=' * 60}")
-    print(f"Results: {result.total_found} pieces of evidence")
-    print(f"Sources: {', '.join(result.sources_searched)}")
-    if result.errors:
-        print(f"Errors: {result.errors}")
-    print(f"{'=' * 60}\n")
-    for i, evidence in enumerate(result.evidence, 1):
-        print(f"[{i}] {evidence.citation.source.upper()}: {evidence.citation.title[:80]}...")
-        print(f"    URL: {evidence.citation.url}")
-        print(f"    Content: {evidence.content[:150]}...")
-        print()
-if __name__ == "__main__":
-    # Default query or use command line arg
-    default_query = "metformin Alzheimer's disease treatment mechanisms"
-    query = sys.argv[1] if len(sys.argv) > 1 else default_query
-    asyncio.run(main(query))

src/middleware/state_machine.py CHANGED Viewed

	@@ -136,3 +136,4 @@ def get_workflow_state() -> WorkflowState:
136
137
138


136
137
138
139	+

src/tools/searchxng_web_search.py CHANGED Viewed

	@@ -122,3 +122,4 @@ class SearchXNGWebSearchTool:
122
123
124


122
123
124
125	+

src/tools/serper_web_search.py CHANGED Viewed

	@@ -122,3 +122,4 @@ class SerperWebSearchTool:
122
123
124


122
123
124
125	+

src/tools/vendored/crawl_website.py CHANGED Viewed

	@@ -134,3 +134,4 @@ async def crawl_website(starting_url: str) -> list[ScrapeResult] \| str:
134
135
136


134
135
136
137	+

src/tools/vendored/searchxng_client.py CHANGED Viewed

	@@ -103,3 +103,4 @@ class SearchXNGClient:
103
104
105


103
104
105
106	+

src/tools/vendored/serper_client.py CHANGED Viewed

	@@ -99,3 +99,4 @@ class SerperClient:
99
100
101


99
100
101
102	+

src/tools/vendored/web_search_core.py CHANGED Viewed

	@@ -208,3 +208,4 @@ def is_valid_url(url: str) -> bool:
208
209
210


208
209
210
211	+

src/tools/web_search_factory.py CHANGED Viewed

	@@ -75,3 +75,4 @@ def create_web_search_tool() -> SearchTool \| None:
75
76
77


75
76
77
78	+

src/utils/markdown.css CHANGED Viewed

	@@ -13,3 +13,4 @@ body {
13
14
15


13
14
15
16	+

src/utils/md_to_pdf.py CHANGED Viewed

	@@ -73,3 +73,4 @@ def md_to_pdf(md_text: str, pdf_file_path: str) -> None:
73
74
75


73
74
75
76	+

src/utils/report_generator.py CHANGED Viewed

	@@ -176,3 +176,4 @@ def generate_report_from_evidence(
176
177
178


176
177
178
179	+

tests/unit/middleware/test_budget_tracker_phase7.py CHANGED Viewed

	@@ -159,3 +159,4 @@ class TestIterationTokenTracking:
159	assert budget2.iteration_tokens[1] == 200
160
161


159	assert budget2.iteration_tokens[1] == 200
160
161
162	+

tests/unit/middleware/test_state_machine.py CHANGED Viewed

	@@ -357,3 +357,4 @@ class TestContextVarIsolation:
357
358
359


357
358
359
360	+

tests/unit/middleware/test_workflow_manager.py CHANGED Viewed

@@ -285,3 +285,4 @@ class TestWorkflowManager:
         assert len(shared) == 1
         assert shared[0].content == "Shared"


285	assert len(shared) == 1
286	assert shared[0].content == "Shared"
287
288	+