Joseph Pollack commited on
Commit
188495c
·
1 Parent(s): dda90bf

adds docs , ci hf spaces

Browse files
.github/scripts/deploy_to_hf_space.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deploy repository to Hugging Face Space, excluding unnecessary files."""
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Set
7
+
8
+ from huggingface_hub import HfApi, Repository
9
+
10
+
11
+ def get_excluded_dirs() -> Set[str]:
12
+ """Get set of directory names to exclude from deployment."""
13
+ return {
14
+ "docs",
15
+ "dev",
16
+ "folder",
17
+ "site",
18
+ "tests", # Optional - can be included if desired
19
+ "examples", # Optional - can be included if desired
20
+ ".git",
21
+ ".github",
22
+ "__pycache__",
23
+ ".pytest_cache",
24
+ ".mypy_cache",
25
+ ".ruff_cache",
26
+ ".venv",
27
+ "venv",
28
+ "env",
29
+ "ENV",
30
+ "node_modules",
31
+ ".cursor",
32
+ "reference_repos",
33
+ "burner_docs",
34
+ "chroma_db",
35
+ "logs",
36
+ "build",
37
+ "dist",
38
+ ".eggs",
39
+ "htmlcov",
40
+ }
41
+
42
+
43
+ def get_excluded_files() -> Set[str]:
44
+ """Get set of file names to exclude from deployment."""
45
+ return {
46
+ ".pre-commit-config.yaml",
47
+ "mkdocs.yml",
48
+ "uv.lock",
49
+ "AGENTS.txt",
50
+ "CONTRIBUTING.md",
51
+ ".env",
52
+ ".env.local",
53
+ "*.local",
54
+ ".DS_Store",
55
+ "Thumbs.db",
56
+ "*.log",
57
+ ".coverage",
58
+ "coverage.xml",
59
+ }
60
+
61
+
62
+ def should_exclude(path: Path, excluded_dirs: Set[str], excluded_files: Set[str]) -> bool:
63
+ """Check if a path should be excluded from deployment."""
64
+ # Check if any parent directory is excluded
65
+ for parent in path.parents:
66
+ if parent.name in excluded_dirs:
67
+ return True
68
+
69
+ # Check if the path itself is a directory that should be excluded
70
+ if path.is_dir() and path.name in excluded_dirs:
71
+ return True
72
+
73
+ # Check if the file name matches excluded patterns
74
+ if path.is_file():
75
+ # Check exact match
76
+ if path.name in excluded_files:
77
+ return True
78
+ # Check pattern matches (simple wildcard support)
79
+ for pattern in excluded_files:
80
+ if "*" in pattern:
81
+ # Simple pattern matching (e.g., "*.log")
82
+ suffix = pattern.replace("*", "")
83
+ if path.name.endswith(suffix):
84
+ return True
85
+
86
+ return False
87
+
88
+
89
+ def deploy_to_hf_space() -> None:
90
+ """Deploy repository to Hugging Face Space.
91
+
92
+ Supports both user and organization Spaces:
93
+ - User Space: username/space-name
94
+ - Organization Space: organization-name/space-name
95
+
96
+ Works with both classic tokens and fine-grained tokens.
97
+ """
98
+ # Get configuration from environment variables
99
+ hf_token = os.getenv("HF_TOKEN")
100
+ hf_username = os.getenv("HF_USERNAME") # Can be username or organization name
101
+ space_name = os.getenv("HF_SPACE_NAME")
102
+
103
+ if not all([hf_token, hf_username, space_name]):
104
+ raise ValueError(
105
+ "Missing required environment variables: HF_TOKEN, HF_USERNAME, HF_SPACE_NAME"
106
+ )
107
+
108
+ # HF_USERNAME can be either a username or organization name
109
+ # Format: {username|organization}/{space_name}
110
+ repo_id = f"{hf_username}/{space_name}"
111
+ local_dir = "hf_space"
112
+
113
+ print(f"🚀 Deploying to Hugging Face Space: {repo_id}")
114
+
115
+ # Initialize HF API
116
+ api = HfApi(token=hf_token)
117
+
118
+ # Clone or create repository
119
+ try:
120
+ repo = Repository(
121
+ local_dir=local_dir,
122
+ clone_from=repo_id,
123
+ token=hf_token,
124
+ repo_type="space",
125
+ )
126
+ print(f"✅ Cloned existing Space: {repo_id}")
127
+ except Exception as e:
128
+ print(f"⚠️ Could not clone Space (may not exist yet): {e}")
129
+ # Create new repository
130
+ api.create_repo(
131
+ repo_id=space_name,
132
+ repo_type="space",
133
+ space_sdk="gradio",
134
+ token=hf_token,
135
+ exist_ok=True,
136
+ )
137
+ repo = Repository(
138
+ local_dir=local_dir,
139
+ clone_from=repo_id,
140
+ token=hf_token,
141
+ repo_type="space",
142
+ )
143
+ print(f"✅ Created new Space: {repo_id}")
144
+
145
+ # Get exclusion sets
146
+ excluded_dirs = get_excluded_dirs()
147
+ excluded_files = get_excluded_files()
148
+
149
+ # Remove all existing files in HF Space (except .git)
150
+ print("🧹 Cleaning existing files...")
151
+ for item in Path(local_dir).iterdir():
152
+ if item.name == ".git":
153
+ continue
154
+ if item.is_dir():
155
+ shutil.rmtree(item)
156
+ else:
157
+ item.unlink()
158
+
159
+ # Copy files from repository root
160
+ print("📦 Copying files...")
161
+ repo_root = Path(".")
162
+ files_copied = 0
163
+ dirs_copied = 0
164
+
165
+ for item in repo_root.rglob("*"):
166
+ # Skip if in .git directory
167
+ if ".git" in item.parts:
168
+ continue
169
+
170
+ # Skip if should be excluded
171
+ if should_exclude(item, excluded_dirs, excluded_files):
172
+ continue
173
+
174
+ # Calculate relative path
175
+ try:
176
+ rel_path = item.relative_to(repo_root)
177
+ except ValueError:
178
+ # Item is outside repo root, skip
179
+ continue
180
+
181
+ # Skip if in excluded directory
182
+ if any(part in excluded_dirs for part in rel_path.parts):
183
+ continue
184
+
185
+ # Destination path
186
+ dest_path = Path(local_dir) / rel_path
187
+
188
+ # Create parent directories
189
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
190
+
191
+ # Copy file or directory
192
+ if item.is_file():
193
+ shutil.copy2(item, dest_path)
194
+ files_copied += 1
195
+ elif item.is_dir():
196
+ # Directory will be created by parent mkdir, but we track it
197
+ dirs_copied += 1
198
+
199
+ print(f"✅ Copied {files_copied} files and {dirs_copied} directories")
200
+
201
+ # Commit and push changes
202
+ print("💾 Committing changes...")
203
+ repo.git_add(auto_lfs_track=True)
204
+
205
+ # Check if there are changes to commit
206
+ try:
207
+ # Try to check if repo is clean (may not be available in all versions)
208
+ if hasattr(repo, "is_repo_clean") and repo.is_repo_clean():
209
+ print("ℹ️ No changes to commit (repository is up to date)")
210
+ else:
211
+ repo.git_commit("Deploy to Hugging Face Space [skip ci]")
212
+ print("📤 Pushing to Hugging Face Space...")
213
+ repo.git_push()
214
+ print("✅ Deployment complete!")
215
+ except Exception as e:
216
+ # If check fails, try to commit anyway (will fail gracefully if no changes)
217
+ try:
218
+ repo.git_commit("Deploy to Hugging Face Space [skip ci]")
219
+ print("📤 Pushing to Hugging Face Space...")
220
+ repo.git_push()
221
+ print("✅ Deployment complete!")
222
+ except Exception as commit_error:
223
+ # If commit fails, likely no changes
224
+ if "nothing to commit" in str(commit_error).lower():
225
+ print("ℹ️ No changes to commit (repository is up to date)")
226
+ else:
227
+ print(f"⚠️ Warning during commit: {commit_error}")
228
+ raise
229
+
230
+ print(f"🎉 Successfully deployed to: https://huggingface.co/spaces/{repo_id}")
231
+
232
+
233
+ if __name__ == "__main__":
234
+ deploy_to_hf_space()
235
+
.github/workflows/deploy-hf-space.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face Space
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch: # Allow manual triggering
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ contents: read
13
+ # No write permissions needed for GitHub repo (we're pushing to HF Space)
14
+
15
+ steps:
16
+ - name: Checkout Repository
17
+ uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.11'
25
+
26
+ - name: Install dependencies
27
+ run: |
28
+ pip install --upgrade pip
29
+ pip install huggingface-hub
30
+
31
+ - name: Deploy to Hugging Face Space
32
+ env:
33
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
34
+ HF_USERNAME: ${{ secrets.HF_USERNAME }}
35
+ HF_SPACE_NAME: ${{ secrets.HF_SPACE_NAME }}
36
+ run: |
37
+ python .github/scripts/deploy_to_hf_space.py
38
+
39
+ - name: Verify deployment
40
+ if: success()
41
+ run: |
42
+ echo "✅ Deployment completed successfully!"
43
+ echo "Space URL: https://huggingface.co/spaces/${{ secrets.HF_USERNAME }}/${{ secrets.HF_SPACE_NAME }}"
44
+
dev/__init__.py CHANGED
@@ -3,3 +3,4 @@
3
 
4
 
5
 
 
 
3
 
4
 
5
 
6
+
docs/LICENSE.md CHANGED
@@ -24,3 +24,4 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
  SOFTWARE.
26
 
 
 
24
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
  SOFTWARE.
26
 
27
+
examples/README.md DELETED
@@ -1,184 +0,0 @@
1
- # The DETERMINATOR Examples
2
-
3
- **NO MOCKS. NO FAKE DATA. REAL SCIENCE.**
4
-
5
- These demos run the REAL deep research pipeline with actual API calls.
6
-
7
- ---
8
-
9
- ## Prerequisites
10
-
11
- You MUST have API keys configured:
12
-
13
- ```bash
14
- # Copy the example and add your keys
15
- cp .env.example .env
16
-
17
- # Required (pick one):
18
- OPENAI_API_KEY=sk-...
19
- ANTHROPIC_API_KEY=sk-ant-...
20
-
21
- # Optional (higher PubMed rate limits):
22
- NCBI_API_KEY=your-key
23
- ```
24
-
25
- ---
26
-
27
- ## Examples
28
-
29
- ### 1. Search Demo (No LLM Required)
30
-
31
- Demonstrates REAL parallel search across PubMed, ClinicalTrials.gov, and Europe PMC.
32
-
33
- ```bash
34
- uv run python examples/search_demo/run_search.py "metformin cancer"
35
- ```
36
-
37
- **What's REAL:**
38
- - Actual NCBI E-utilities API calls (PubMed)
39
- - Actual ClinicalTrials.gov API calls
40
- - Actual Europe PMC API calls (includes preprints)
41
- - Real papers, real trials, real preprints
42
-
43
- ---
44
-
45
- ### 2. Embeddings Demo (No LLM Required)
46
-
47
- Demonstrates REAL semantic search and deduplication.
48
-
49
- ```bash
50
- uv run python examples/embeddings_demo/run_embeddings.py
51
- ```
52
-
53
- **What's REAL:**
54
- - Actual sentence-transformers model (all-MiniLM-L6-v2)
55
- - Actual ChromaDB vector storage
56
- - Real cosine similarity computations
57
- - Real semantic deduplication
58
-
59
- ---
60
-
61
- ### 3. Orchestrator Demo (LLM Required)
62
-
63
- Demonstrates the REAL search-judge-synthesize loop.
64
-
65
- ```bash
66
- uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
67
- uv run python examples/orchestrator_demo/run_agent.py "aspirin alzheimer" --iterations 5
68
- ```
69
-
70
- **What's REAL:**
71
- - Real PubMed + ClinicalTrials + Europe PMC searches
72
- - Real LLM judge evaluating evidence quality
73
- - Real iterative refinement based on LLM decisions
74
- - Real research synthesis
75
-
76
- ---
77
-
78
- ### 4. Magentic Demo (OpenAI Required)
79
-
80
- Demonstrates REAL multi-agent coordination using Microsoft Agent Framework.
81
-
82
- ```bash
83
- # Requires OPENAI_API_KEY specifically
84
- uv run python examples/orchestrator_demo/run_magentic.py "metformin cancer"
85
- ```
86
-
87
- **What's REAL:**
88
- - Real MagenticBuilder orchestration
89
- - Real SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent
90
- - Real manager-based coordination
91
-
92
- ---
93
-
94
- ### 5. Hypothesis Demo (LLM Required)
95
-
96
- Demonstrates REAL mechanistic hypothesis generation.
97
-
98
- ```bash
99
- uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
100
- uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
101
- ```
102
-
103
- **What's REAL:**
104
- - Real PubMed + Web search first
105
- - Real embedding-based deduplication
106
- - Real LLM generating Drug -> Target -> Pathway -> Effect chains
107
- - Real knowledge gap identification
108
-
109
- ---
110
-
111
- ### 6. Full-Stack Demo (LLM Required)
112
-
113
- **THE COMPLETE PIPELINE** - All phases working together.
114
-
115
- ```bash
116
- uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
117
- uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
118
- ```
119
-
120
- **What's REAL:**
121
- 1. Real PubMed + ClinicalTrials + Europe PMC evidence collection
122
- 2. Real embedding-based semantic deduplication
123
- 3. Real LLM mechanistic hypothesis generation
124
- 4. Real LLM evidence quality assessment
125
- 5. Real LLM structured scientific report generation
126
-
127
- Output: Publication-quality research report with validated citations.
128
-
129
- ---
130
-
131
- ## API Key Requirements
132
-
133
- | Example | LLM Required | Keys |
134
- |---------|--------------|------|
135
- | search_demo | No | Optional: `NCBI_API_KEY` |
136
- | embeddings_demo | No | None |
137
- | orchestrator_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
138
- | run_magentic | Yes | `OPENAI_API_KEY` (Magentic requires OpenAI) |
139
- | hypothesis_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
140
- | full_stack_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
141
-
142
- ---
143
-
144
- ## Architecture
145
-
146
- ```text
147
- User Query
148
- |
149
- v
150
- [REAL Search] --> PubMed + ClinicalTrials + Europe PMC APIs
151
- |
152
- v
153
- [REAL Embeddings] --> Actual sentence-transformers
154
- |
155
- v
156
- [REAL Hypothesis] --> Actual LLM reasoning
157
- |
158
- v
159
- [REAL Judge] --> Actual LLM assessment
160
- |
161
- +---> Need more? --> Loop back to Search
162
- |
163
- +---> Sufficient --> Continue
164
- |
165
- v
166
- [REAL Report] --> Actual LLM synthesis
167
- |
168
- v
169
- Publication-Quality Research Report
170
- ```
171
-
172
- ---
173
-
174
- ## Why No Mocks?
175
-
176
- > "Authenticity is the feature."
177
-
178
- Mocks belong in `tests/unit/`, not in demos. When you run these examples, you see:
179
- - Real papers from real databases
180
- - Real AI reasoning about real evidence
181
- - Real scientific hypotheses
182
- - Real research reports
183
-
184
- This is what The DETERMINATOR actually does. No fake data. No canned responses.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/embeddings_demo/run_embeddings.py DELETED
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: Semantic Search & Deduplication (Phase 6).
4
-
5
- This script demonstrates embedding-based capabilities using REAL data:
6
- - Fetches REAL abstracts from PubMed
7
- - Embeds text with sentence-transformers
8
- - Performs semantic deduplication on LIVE research data
9
-
10
- Usage:
11
- uv run python examples/embeddings_demo/run_embeddings.py
12
- """
13
-
14
- import asyncio
15
-
16
- from src.services.embeddings import EmbeddingService
17
- from src.tools.pubmed import PubMedTool
18
-
19
-
20
- def create_fresh_service(name_suffix: str = "") -> EmbeddingService:
21
- """Create a fresh embedding service with unique collection name."""
22
- import uuid
23
-
24
- # Create service with unique collection by modifying the internal collection
25
- service = EmbeddingService.__new__(EmbeddingService)
26
- service._model = __import__("sentence_transformers").SentenceTransformer("all-MiniLM-L6-v2")
27
- service._client = __import__("chromadb").Client()
28
- collection_name = f"demo_{name_suffix}_{uuid.uuid4().hex[:8]}"
29
- service._collection = service._client.create_collection(
30
- name=collection_name, metadata={"hnsw:space": "cosine"}
31
- )
32
- return service
33
-
34
-
35
- async def demo_real_pipeline() -> None:
36
- """Run the demo using REAL PubMed data."""
37
- print("\n" + "=" * 60)
38
- print("DeepCritical Embeddings Demo (REAL DATA)")
39
- print("=" * 60)
40
-
41
- # 1. Fetch Real Data
42
- query = "metformin mechanism of action"
43
- print(f"\n[1] Fetching real papers for: '{query}'...")
44
- pubmed = PubMedTool()
45
- # Fetch enough results to likely get some overlap/redundancy
46
- evidence = await pubmed.search(query, max_results=10)
47
-
48
- print(f" Found {len(evidence)} papers.")
49
- print("\n Sample Titles:")
50
- for i, e in enumerate(evidence[:3], 1):
51
- print(f" {i}. {e.citation.title[:80]}...")
52
-
53
- # 2. Embed Data
54
- print("\n[2] Embedding abstracts (sentence-transformers)...")
55
- service = create_fresh_service("real_demo")
56
-
57
- # 3. Semantic Search
58
- print("\n[3] Semantic Search Demo")
59
- print(" Indexing evidence...")
60
- for e in evidence:
61
- # Use URL as ID for uniqueness
62
- await service.add_evidence(
63
- evidence_id=e.citation.url,
64
- content=e.content,
65
- metadata={
66
- "source": e.citation.source,
67
- "title": e.citation.title,
68
- "date": e.citation.date,
69
- },
70
- )
71
-
72
- semantic_query = "activation of AMPK pathway"
73
- print(f" Searching for concept: '{semantic_query}'")
74
- results = await service.search_similar(semantic_query, n_results=2)
75
-
76
- print(" Top matches:")
77
- for i, r in enumerate(results, 1):
78
- similarity = 1 - r["distance"]
79
- print(f" {i}. [{similarity:.1%} match] {r['metadata']['title'][:70]}...")
80
-
81
- # 4. Semantic Deduplication
82
- print("\n[4] Semantic Deduplication Demo")
83
- # Create a FRESH service for deduplication so we don't clash with Step 3's index
84
- dedup_service = create_fresh_service("dedup_demo")
85
-
86
- print(" Checking for redundant papers (threshold=0.85)...")
87
-
88
- # To force a duplicate for demo purposes, let's double the evidence list
89
- # simulating finding the same papers again or very similar ones
90
- duplicated_evidence = evidence + evidence[:2]
91
- print(f" Input pool: {len(duplicated_evidence)} items (with artificial duplicates added)")
92
-
93
- unique = await dedup_service.deduplicate(duplicated_evidence, threshold=0.85)
94
-
95
- print(f" Output pool: {len(unique)} unique items")
96
- print(f" Removed {len(duplicated_evidence) - len(unique)} duplicates.")
97
-
98
- print("\n" + "=" * 60)
99
- print("Demo complete! Verified with REAL PubMed data.")
100
- print("=" * 60 + "\n")
101
-
102
-
103
- if __name__ == "__main__":
104
- asyncio.run(demo_real_pipeline())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/full_stack_demo/run_full.py DELETED
@@ -1,236 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: Full Stack DETERMINATOR Agent (Phases 1-8).
4
-
5
- This script demonstrates the COMPLETE REAL deep research pipeline:
6
- - Phase 2: REAL Search (PubMed + ClinicalTrials + Europe PMC)
7
- - Phase 6: REAL Embeddings (sentence-transformers + ChromaDB)
8
- - Phase 7: REAL Hypothesis (LLM mechanistic reasoning)
9
- - Phase 3: REAL Judge (LLM evidence assessment)
10
- - Phase 8: REAL Report (LLM structured scientific report)
11
-
12
- NO MOCKS. NO FAKE DATA. REAL SCIENCE.
13
-
14
- Usage:
15
- uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
16
- uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
17
-
18
- Requires: OPENAI_API_KEY or ANTHROPIC_API_KEY
19
- """
20
-
21
- import argparse
22
- import asyncio
23
- import os
24
- import sys
25
- from typing import Any
26
-
27
- from src.utils.models import Evidence
28
-
29
-
30
- def print_header(title: str) -> None:
31
- """Print a formatted section header."""
32
- print(f"\n{'=' * 70}")
33
- print(f" {title}")
34
- print(f"{'=' * 70}\n")
35
-
36
-
37
- def print_step(step: int, name: str) -> None:
38
- """Print a step indicator."""
39
- print(f"\n[Step {step}] {name}")
40
- print("-" * 50)
41
-
42
-
43
- _MAX_DISPLAY_LEN = 600
44
-
45
-
46
- def _print_truncated(text: str) -> None:
47
- """Print text, truncating if too long."""
48
- if len(text) > _MAX_DISPLAY_LEN:
49
- print(text[:_MAX_DISPLAY_LEN] + "\n... [truncated for display]")
50
- else:
51
- print(text)
52
-
53
-
54
- async def _run_search_iteration(
55
- query: str,
56
- iteration: int,
57
- evidence_store: dict[str, Any],
58
- all_evidence: list[Evidence],
59
- search_handler: Any,
60
- embedding_service: Any,
61
- ) -> list[Evidence]:
62
- """Run a single search iteration with deduplication."""
63
- search_queries = [query]
64
- if evidence_store.get("hypotheses"):
65
- for h in evidence_store["hypotheses"][-2:]:
66
- search_queries.extend(h.search_suggestions[:1])
67
-
68
- for q in search_queries[:2]:
69
- result = await search_handler.execute(q, max_results_per_tool=5)
70
- print(f" '{q}' -> {result.total_found} results")
71
- new_unique = await embedding_service.deduplicate(result.evidence)
72
- print(f" After dedup: {len(new_unique)} unique")
73
- all_evidence.extend(new_unique)
74
-
75
- evidence_store["current"] = all_evidence
76
- evidence_store["iteration_count"] = iteration
77
- return all_evidence
78
-
79
-
80
- async def _handle_judge_step(
81
- judge_handler: Any, query: str, all_evidence: list[Evidence], evidence_store: dict[str, Any]
82
- ) -> tuple[bool, str]:
83
- """Handle the judge assessment step. Returns (should_stop, next_query)."""
84
- print("\n[Judge] Assessing evidence quality (REAL LLM)...")
85
- assessment = await judge_handler.assess(query, all_evidence)
86
- print(f" Mechanism Score: {assessment.details.mechanism_score}/10")
87
- print(f" Clinical Score: {assessment.details.clinical_evidence_score}/10")
88
- print(f" Confidence: {assessment.confidence:.0%}")
89
- print(f" Recommendation: {assessment.recommendation.upper()}")
90
-
91
- if assessment.recommendation == "synthesize":
92
- print("\n[Judge] Evidence sufficient! Proceeding to report generation...")
93
- evidence_store["last_assessment"] = assessment.details.model_dump()
94
- return True, query
95
-
96
- next_queries = assessment.next_search_queries[:2] if assessment.next_search_queries else []
97
- if next_queries:
98
- print(f"\n[Judge] Need more evidence. Next queries: {next_queries}")
99
- return False, next_queries[0]
100
-
101
- print("\n[Judge] Need more evidence but no suggested queries. Continuing with original query.")
102
- return False, query
103
-
104
-
105
- async def run_full_demo(query: str, max_iterations: int) -> None:
106
- """Run the REAL full stack pipeline."""
107
- print_header("DeepCritical Full Stack Demo (REAL)")
108
- print(f"Query: {query}")
109
- print(f"Max iterations: {max_iterations}")
110
- print("Mode: REAL (All live API calls - no mocks)\n")
111
-
112
- # Import real components
113
- from src.agent_factory.judges import JudgeHandler
114
- from src.agents.hypothesis_agent import HypothesisAgent
115
- from src.agents.report_agent import ReportAgent
116
- from src.services.embeddings import EmbeddingService
117
- from src.tools.clinicaltrials import ClinicalTrialsTool
118
- from src.tools.europepmc import EuropePMCTool
119
- from src.tools.pubmed import PubMedTool
120
- from src.tools.search_handler import SearchHandler
121
-
122
- # Initialize REAL services
123
- print("[Init] Loading embedding model...")
124
- embedding_service = EmbeddingService()
125
- search_handler = SearchHandler(
126
- tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
127
- )
128
- judge_handler = JudgeHandler()
129
-
130
- # Shared evidence store
131
- evidence_store: dict[str, Any] = {"current": [], "hypotheses": [], "iteration_count": 0}
132
- all_evidence: list[Evidence] = []
133
-
134
- for iteration in range(1, max_iterations + 1):
135
- print_step(iteration, f"ITERATION {iteration}/{max_iterations}")
136
-
137
- # Step 1: REAL Search
138
- print("\n[Search] Querying PubMed + ClinicalTrials + Europe PMC (REAL API calls)...")
139
- all_evidence = await _run_search_iteration(
140
- query, iteration, evidence_store, all_evidence, search_handler, embedding_service
141
- )
142
-
143
- if not all_evidence:
144
- print("\nNo evidence found. Try a different query.")
145
- return
146
-
147
- # Step 2: REAL Hypothesis generation (first iteration only)
148
- if iteration == 1:
149
- print("\n[Hypothesis] Generating mechanistic hypotheses (REAL LLM)...")
150
- hypothesis_agent = HypothesisAgent(evidence_store, embedding_service)
151
- hyp_response = await hypothesis_agent.run(query)
152
- _print_truncated(hyp_response.messages[0].text)
153
-
154
- # Step 3: REAL Judge
155
- should_stop, query = await _handle_judge_step(
156
- judge_handler, query, all_evidence, evidence_store
157
- )
158
- if should_stop:
159
- break
160
-
161
- # Step 4: REAL Report generation
162
- print_step(iteration + 1, "REPORT GENERATION (REAL LLM)")
163
- report_agent = ReportAgent(evidence_store, embedding_service)
164
- report_response = await report_agent.run(query)
165
-
166
- print("\n" + "=" * 70)
167
- print(" FINAL RESEARCH REPORT")
168
- print("=" * 70)
169
- print(report_response.messages[0].text)
170
-
171
-
172
- async def main() -> None:
173
- """Entry point."""
174
- parser = argparse.ArgumentParser(
175
- description="DeepCritical Full Stack Demo - REAL, No Mocks",
176
- formatter_class=argparse.RawDescriptionHelpFormatter,
177
- epilog="""
178
- This demo runs the COMPLETE pipeline with REAL API calls:
179
- 1. REAL search: Actual PubMed queries
180
- 2. REAL embeddings: Actual sentence-transformers model
181
- 3. REAL hypothesis: Actual LLM generating mechanistic chains
182
- 4. REAL judge: Actual LLM assessing evidence quality
183
- 5. REAL report: Actual LLM generating structured report
184
-
185
- Examples:
186
- uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
187
- uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
188
- uv run python examples/full_stack_demo/run_full.py "aspirin cancer prevention"
189
- """,
190
- )
191
- parser.add_argument(
192
- "query",
193
- help="Research query (e.g., 'metformin Alzheimer's disease')",
194
- )
195
- parser.add_argument(
196
- "-i",
197
- "--iterations",
198
- type=int,
199
- default=2,
200
- help="Max search iterations (default: 2)",
201
- )
202
-
203
- args = parser.parse_args()
204
-
205
- if args.iterations < 1:
206
- print("Error: iterations must be at least 1")
207
- sys.exit(1)
208
-
209
- # Fail fast: require API key
210
- if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
211
- print("=" * 70)
212
- print("ERROR: This demo requires a real LLM.")
213
- print()
214
- print("Set one of the following in your .env file:")
215
- print(" OPENAI_API_KEY=sk-...")
216
- print(" ANTHROPIC_API_KEY=sk-ant-...")
217
- print()
218
- print("This is a REAL demo. No mocks. No fake data.")
219
- print("=" * 70)
220
- sys.exit(1)
221
-
222
- await run_full_demo(args.query, args.iterations)
223
-
224
- print("\n" + "=" * 70)
225
- print(" DeepCritical Full Stack Demo Complete!")
226
- print(" ")
227
- print(" Everything you just saw was REAL:")
228
- print(" - Real PubMed + ClinicalTrials + Europe PMC searches")
229
- print(" - Real embedding computations")
230
- print(" - Real LLM reasoning")
231
- print(" - Real scientific report")
232
- print("=" * 70 + "\n")
233
-
234
-
235
- if __name__ == "__main__":
236
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/hypothesis_demo/run_hypothesis.py DELETED
@@ -1,142 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: Hypothesis Generation (Phase 7).
4
-
5
- This script demonstrates the REAL hypothesis generation pipeline:
6
- 1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
7
- 2. REAL embeddings: Semantic deduplication
8
- 3. REAL LLM: Mechanistic hypothesis generation
9
-
10
- Usage:
11
- # Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
12
- uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
13
- uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
14
- """
15
-
16
- import argparse
17
- import asyncio
18
- import os
19
- import sys
20
- from typing import Any
21
-
22
- from src.agents.hypothesis_agent import HypothesisAgent
23
- from src.services.embeddings import EmbeddingService
24
- from src.tools.clinicaltrials import ClinicalTrialsTool
25
- from src.tools.europepmc import EuropePMCTool
26
- from src.tools.pubmed import PubMedTool
27
- from src.tools.search_handler import SearchHandler
28
-
29
-
30
- async def run_hypothesis_demo(query: str) -> None:
31
- """Run the REAL hypothesis generation pipeline."""
32
- try:
33
- print(f"\n{'=' * 60}")
34
- print("DeepCritical Hypothesis Agent Demo (Phase 7)")
35
- print(f"Query: {query}")
36
- print("Mode: REAL (Live API calls)")
37
- print(f"{'=' * 60}\n")
38
-
39
- # Step 1: REAL Search
40
- print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
41
- search_handler = SearchHandler(
42
- tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
43
- )
44
- result = await search_handler.execute(query, max_results_per_tool=5)
45
-
46
- print(f" Found {result.total_found} results from {result.sources_searched}")
47
- if result.errors:
48
- print(f" Warnings: {result.errors}")
49
-
50
- if not result.evidence:
51
- print("\nNo evidence found. Try a different query.")
52
- return
53
-
54
- # Step 2: REAL Embeddings - Deduplicate
55
- print("\n[Step 2] Semantic deduplication...")
56
- embedding_service = EmbeddingService()
57
- unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
58
- print(f" {len(result.evidence)} -> {len(unique_evidence)} unique papers")
59
-
60
- # Show what we found
61
- print("\n[Evidence collected]")
62
- max_title_len = 50
63
- for i, e in enumerate(unique_evidence[:5], 1):
64
- raw_title = e.citation.title
65
- if len(raw_title) > max_title_len:
66
- title = raw_title[:max_title_len] + "..."
67
- else:
68
- title = raw_title
69
- print(f" {i}. [{e.citation.source.upper()}] {title}")
70
-
71
- # Step 3: REAL LLM - Generate hypotheses
72
- print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
73
- evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
74
- agent = HypothesisAgent(evidence_store, embedding_service)
75
-
76
- print("-" * 60)
77
- response = await agent.run(query)
78
- print(response.messages[0].text)
79
- print("-" * 60)
80
-
81
- # Show stored hypotheses
82
- hypotheses = evidence_store.get("hypotheses", [])
83
- print(f"\n{len(hypotheses)} hypotheses stored")
84
-
85
- if hypotheses:
86
- print("\nGenerated search queries for further investigation:")
87
- for h in hypotheses:
88
- queries = h.to_search_queries()
89
- print(f" {h.drug} -> {h.target}:")
90
- for q in queries[:3]:
91
- print(f" - {q}")
92
-
93
- except Exception as e:
94
- print(f"\n❌ Error during hypothesis generation: {e}")
95
- raise
96
-
97
-
98
- async def main() -> None:
99
- """Entry point."""
100
- parser = argparse.ArgumentParser(
101
- description="Hypothesis Generation Demo (REAL - No Mocks)",
102
- formatter_class=argparse.RawDescriptionHelpFormatter,
103
- epilog="""
104
- Examples:
105
- uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
106
- uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
107
- uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention"
108
- """,
109
- )
110
- parser.add_argument(
111
- "query",
112
- nargs="?",
113
- default="metformin Alzheimer's disease",
114
- help="Research query",
115
- )
116
- args = parser.parse_args()
117
-
118
- # Fail fast: require API key
119
- if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
120
- print("=" * 60)
121
- print("ERROR: This demo requires a real LLM.")
122
- print()
123
- print("Set one of the following in your .env file:")
124
- print(" OPENAI_API_KEY=sk-...")
125
- print(" ANTHROPIC_API_KEY=sk-ant-...")
126
- print()
127
- print("This is a REAL demo, not a mock. No fake data.")
128
- print("=" * 60)
129
- sys.exit(1)
130
-
131
- await run_hypothesis_demo(args.query)
132
-
133
- print("\n" + "=" * 60)
134
- print("Demo complete! This was a REAL pipeline:")
135
- print(" 1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
136
- print(" 2. REAL embeddings: Actual sentence-transformers")
137
- print(" 3. REAL LLM: Actual hypothesis generation")
138
- print("=" * 60 + "\n")
139
-
140
-
141
- if __name__ == "__main__":
142
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/modal_demo/run_analysis.py DELETED
@@ -1,64 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Demo: Modal-powered statistical analysis.
3
-
4
- This script uses StatisticalAnalyzer directly (NO agent_framework dependency).
5
-
6
- Usage:
7
- uv run python examples/modal_demo/run_analysis.py "metformin alzheimer"
8
- """
9
-
10
- import argparse
11
- import asyncio
12
- import os
13
- import sys
14
-
15
- from src.services.statistical_analyzer import get_statistical_analyzer
16
- from src.tools.pubmed import PubMedTool
17
- from src.utils.config import settings
18
-
19
-
20
- async def main() -> None:
21
- """Run the Modal analysis demo."""
22
- parser = argparse.ArgumentParser(description="Modal Analysis Demo")
23
- parser.add_argument("query", help="Research query")
24
- args = parser.parse_args()
25
-
26
- if not settings.modal_available:
27
- print("Error: Modal credentials not configured.")
28
- sys.exit(1)
29
-
30
- if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
31
- print("Error: No LLM API key found.")
32
- sys.exit(1)
33
-
34
- print(f"\n{'=' * 60}")
35
- print("DeepCritical Modal Analysis Demo")
36
- print(f"Query: {args.query}")
37
- print(f"{'=' * 60}\n")
38
-
39
- # Step 1: Gather Evidence
40
- print("Step 1: Gathering evidence from PubMed...")
41
- pubmed = PubMedTool()
42
- evidence = await pubmed.search(args.query, max_results=5)
43
- print(f" Found {len(evidence)} papers\n")
44
-
45
- # Step 2: Run Modal Analysis
46
- print("Step 2: Running statistical analysis in Modal sandbox...")
47
- analyzer = get_statistical_analyzer()
48
- result = await analyzer.analyze(query=args.query, evidence=evidence)
49
-
50
- # Step 3: Display Results
51
- print("\n" + "=" * 60)
52
- print("ANALYSIS RESULTS")
53
- print("=" * 60)
54
- print(f"\nVerdict: {result.verdict}")
55
- print(f"Confidence: {result.confidence:.0%}")
56
- print("\nKey Findings:")
57
- for finding in result.key_findings:
58
- print(f" - {finding}")
59
-
60
- print("\n[Demo Complete - Code executed in Modal, not locally]")
61
-
62
-
63
- if __name__ == "__main__":
64
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/modal_demo/test_code_execution.py DELETED
@@ -1,169 +0,0 @@
1
- """Demo script to test Modal code execution integration.
2
-
3
- Run with: uv run python examples/modal_demo/test_code_execution.py
4
- """
5
-
6
- import sys
7
- from pathlib import Path
8
-
9
- # Add src to path
10
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
11
-
12
- from src.tools.code_execution import CodeExecutionError, get_code_executor
13
-
14
-
15
- def test_basic_execution():
16
- """Test basic code execution."""
17
- print("\n=== Test 1: Basic Execution ===")
18
- executor = get_code_executor()
19
-
20
- code = """
21
- print("Hello from Modal sandbox!")
22
- result = 2 + 2
23
- print(f"2 + 2 = {result}")
24
- """
25
-
26
- result = executor.execute(code)
27
- print(f"Success: {result['success']}")
28
- print(f"Stdout:\n{result['stdout']}")
29
- if result["stderr"]:
30
- print(f"Stderr:\n{result['stderr']}")
31
-
32
-
33
- def test_scientific_computing():
34
- """Test scientific computing libraries."""
35
- print("\n=== Test 2: Scientific Computing ===")
36
- executor = get_code_executor()
37
-
38
- code = """
39
- import pandas as pd
40
- import numpy as np
41
-
42
- # Create sample data
43
- data = {
44
- 'drug': ['DrugA', 'DrugB', 'DrugC'],
45
- 'efficacy': [0.75, 0.82, 0.68],
46
- 'sample_size': [100, 150, 120]
47
- }
48
-
49
- df = pd.DataFrame(data)
50
-
51
- # Calculate weighted average
52
- weighted_avg = np.average(df['efficacy'], weights=df['sample_size'])
53
-
54
- print(f"Drugs tested: {len(df)}")
55
- print(f"Weighted average efficacy: {weighted_avg:.3f}")
56
- print("\\nDataFrame:")
57
- print(df.to_string())
58
- """
59
-
60
- result = executor.execute(code)
61
- print(f"Success: {result['success']}")
62
- print(f"Output:\n{result['stdout']}")
63
-
64
-
65
- def test_statistical_analysis():
66
- """Test statistical analysis."""
67
- print("\n=== Test 3: Statistical Analysis ===")
68
- executor = get_code_executor()
69
-
70
- code = """
71
- import numpy as np
72
- from scipy import stats
73
-
74
- # Simulate two treatment groups
75
- np.random.seed(42)
76
- control_group = np.random.normal(100, 15, 50)
77
- treatment_group = np.random.normal(110, 15, 50)
78
-
79
- # Perform t-test
80
- t_stat, p_value = stats.ttest_ind(treatment_group, control_group)
81
-
82
- print(f"Control mean: {np.mean(control_group):.2f}")
83
- print(f"Treatment mean: {np.mean(treatment_group):.2f}")
84
- print(f"T-statistic: {t_stat:.3f}")
85
- print(f"P-value: {p_value:.4f}")
86
-
87
- if p_value < 0.05:
88
- print("Result: Statistically significant difference")
89
- else:
90
- print("Result: No significant difference")
91
- """
92
-
93
- result = executor.execute(code)
94
- print(f"Success: {result['success']}")
95
- print(f"Output:\n{result['stdout']}")
96
-
97
-
98
- def test_with_return_value():
99
- """Test execute_with_return method."""
100
- print("\n=== Test 4: Return Value ===")
101
- executor = get_code_executor()
102
-
103
- code = """
104
- import numpy as np
105
-
106
- # Calculate something
107
- data = np.array([1, 2, 3, 4, 5])
108
- result = {
109
- 'mean': float(np.mean(data)),
110
- 'std': float(np.std(data)),
111
- 'sum': int(np.sum(data))
112
- }
113
- """
114
-
115
- try:
116
- result = executor.execute_with_return(code)
117
- print(f"Returned result: {result}")
118
- print(f"Mean: {result['mean']}")
119
- print(f"Std: {result['std']}")
120
- print(f"Sum: {result['sum']}")
121
- except CodeExecutionError as e:
122
- print(f"Error: {e}")
123
-
124
-
125
- def test_error_handling():
126
- """Test error handling."""
127
- print("\n=== Test 5: Error Handling ===")
128
- executor = get_code_executor()
129
-
130
- code = """
131
- # This will fail
132
- x = 1 / 0
133
- """
134
-
135
- result = executor.execute(code)
136
- print(f"Success: {result['success']}")
137
- print(f"Error: {result['error']}")
138
-
139
-
140
- def main():
141
- """Run all tests."""
142
- print("=" * 60)
143
- print("Modal Code Execution Demo")
144
- print("=" * 60)
145
-
146
- tests = [
147
- test_basic_execution,
148
- test_scientific_computing,
149
- test_statistical_analysis,
150
- test_with_return_value,
151
- test_error_handling,
152
- ]
153
-
154
- for test in tests:
155
- try:
156
- test()
157
- except Exception as e:
158
- print(f"\n❌ Test failed: {e}")
159
- import traceback
160
-
161
- traceback.print_exc()
162
-
163
- print("\n" + "=" * 60)
164
- print("Demo completed!")
165
- print("=" * 60)
166
-
167
-
168
- if __name__ == "__main__":
169
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/modal_demo/verify_sandbox.py DELETED
@@ -1,101 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Verify that Modal sandbox is properly isolated.
3
-
4
- This script proves to judges that code runs in Modal, not locally.
5
- NO agent_framework dependency - uses only src.tools.code_execution.
6
-
7
- Usage:
8
- uv run python examples/modal_demo/verify_sandbox.py
9
- """
10
-
11
- import asyncio
12
- from functools import partial
13
-
14
- from src.tools.code_execution import CodeExecutionError, get_code_executor
15
- from src.utils.config import settings
16
-
17
-
18
- def print_result(result: dict) -> None:
19
- """Print execution result, surfacing errors when they occur."""
20
- if result.get("success"):
21
- print(f" {result['stdout'].strip()}\n")
22
- else:
23
- error = result.get("error") or result.get("stderr", "").strip() or "Unknown error"
24
- print(f" ERROR: {error}\n")
25
-
26
-
27
- async def main() -> None:
28
- """Verify Modal sandbox isolation."""
29
- if not settings.modal_available:
30
- print("Error: Modal credentials not configured.")
31
- print("Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in .env")
32
- return
33
-
34
- try:
35
- executor = get_code_executor()
36
- loop = asyncio.get_running_loop()
37
-
38
- print("=" * 60)
39
- print("Modal Sandbox Isolation Verification")
40
- print("=" * 60 + "\n")
41
-
42
- # Test 1: Hostname
43
- print("Test 1: Check hostname (should NOT be your machine)")
44
- code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
45
- result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
46
- print_result(result1)
47
-
48
- # Test 2: Scientific libraries
49
- print("Test 2: Verify scientific libraries")
50
- code2 = """
51
- import pandas as pd
52
- import numpy as np
53
- import scipy
54
- print(f"pandas: {pd.__version__}")
55
- print(f"numpy: {np.__version__}")
56
- print(f"scipy: {scipy.__version__}")
57
- """
58
- result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
59
- print_result(result2)
60
-
61
- # Test 3: Network blocked
62
- print("Test 3: Verify network isolation")
63
- code3 = """
64
- import urllib.request
65
- try:
66
- urllib.request.urlopen("https://google.com", timeout=2)
67
- print("Network: ALLOWED (unexpected!)")
68
- except Exception:
69
- print("Network: BLOCKED (as expected)")
70
- """
71
- result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
72
- print_result(result3)
73
-
74
- # Test 4: Real statistics
75
- print("Test 4: Execute statistical analysis")
76
- code4 = """
77
- import pandas as pd
78
- import scipy.stats as stats
79
-
80
- data = pd.DataFrame({'effect': [0.42, 0.38, 0.51]})
81
- mean = data['effect'].mean()
82
- t_stat, p_val = stats.ttest_1samp(data['effect'], 0)
83
-
84
- print(f"Mean Effect: {mean:.3f}")
85
- print(f"P-value: {p_val:.4f}")
86
- print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
87
- """
88
- result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
89
- print_result(result4)
90
-
91
- print("=" * 60)
92
- print("All tests complete - Modal sandbox verified!")
93
- print("=" * 60)
94
-
95
- except CodeExecutionError as e:
96
- print(f"Error: Modal code execution failed: {e}")
97
- print("Hint: Ensure Modal SDK is installed and credentials are valid.")
98
-
99
-
100
- if __name__ == "__main__":
101
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/orchestrator_demo/run_agent.py DELETED
@@ -1,115 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: DeepCritical Agent Loop (Search + Judge + Orchestrator).
4
-
5
- This script demonstrates the REAL Phase 4 orchestration:
6
- - REAL Iterative Search (PubMed + ClinicalTrials + Europe PMC)
7
- - REAL Evidence Evaluation (LLM Judge)
8
- - REAL Orchestration Loop
9
- - REAL Final Synthesis
10
-
11
- NO MOCKS. REAL API CALLS.
12
-
13
- Usage:
14
- uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
15
- uv run python examples/orchestrator_demo/run_agent.py "sildenafil heart failure" --iterations 5
16
-
17
- Requires: OPENAI_API_KEY or ANTHROPIC_API_KEY
18
- """
19
-
20
- import argparse
21
- import asyncio
22
- import os
23
- import sys
24
-
25
- from src.agent_factory.judges import JudgeHandler
26
- from src.orchestrator import Orchestrator
27
- from src.tools.clinicaltrials import ClinicalTrialsTool
28
- from src.tools.europepmc import EuropePMCTool
29
- from src.tools.pubmed import PubMedTool
30
- from src.tools.search_handler import SearchHandler
31
- from src.utils.models import OrchestratorConfig
32
-
33
- MAX_ITERATIONS = 10
34
-
35
-
36
- async def main() -> None:
37
- """Run the REAL agent demo."""
38
- parser = argparse.ArgumentParser(
39
- description="DeepCritical Agent Demo - REAL, No Mocks",
40
- formatter_class=argparse.RawDescriptionHelpFormatter,
41
- epilog="""
42
- This demo runs the REAL search-judge-synthesize loop:
43
- 1. REAL search: PubMed + ClinicalTrials + Europe PMC queries
44
- 2. REAL judge: Actual LLM assessing evidence quality
45
- 3. REAL loop: Actual iterative refinement based on LLM decisions
46
- 4. REAL synthesis: Actual research summary generation
47
-
48
- Examples:
49
- uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
50
- uv run python examples/orchestrator_demo/run_agent.py "aspirin alzheimer" --iterations 5
51
- """,
52
- )
53
- parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
54
- parser.add_argument("--iterations", type=int, default=3, help="Max iterations (default: 3)")
55
- args = parser.parse_args()
56
-
57
- if not 1 <= args.iterations <= MAX_ITERATIONS:
58
- print(f"Error: iterations must be between 1 and {MAX_ITERATIONS}")
59
- sys.exit(1)
60
-
61
- # Fail fast: require API key
62
- if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
63
- print("=" * 60)
64
- print("ERROR: This demo requires a real LLM.")
65
- print()
66
- print("Set one of the following in your .env file:")
67
- print(" OPENAI_API_KEY=sk-...")
68
- print(" ANTHROPIC_API_KEY=sk-ant-...")
69
- print()
70
- print("This is a REAL demo. No mocks. No fake data.")
71
- print("=" * 60)
72
- sys.exit(1)
73
-
74
- print(f"\n{'=' * 60}")
75
- print("DeepCritical Agent Demo (REAL)")
76
- print(f"Query: {args.query}")
77
- print(f"Max Iterations: {args.iterations}")
78
- print("Mode: REAL (All live API calls)")
79
- print(f"{'=' * 60}\n")
80
-
81
- # Setup REAL components
82
- search_handler = SearchHandler(
83
- tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
84
- )
85
- judge_handler = JudgeHandler() # REAL LLM judge
86
-
87
- config = OrchestratorConfig(max_iterations=args.iterations)
88
- orchestrator = Orchestrator(
89
- search_handler=search_handler, judge_handler=judge_handler, config=config
90
- )
91
-
92
- # Run the REAL loop
93
- try:
94
- async for event in orchestrator.run(args.query):
95
- # Print event with icon (remove markdown bold for CLI)
96
- print(event.to_markdown().replace("**", ""))
97
-
98
- # Show search results count
99
- if event.type == "search_complete" and event.data:
100
- print(f" -> Found {event.data.get('new_count', 0)} new items")
101
-
102
- except Exception as e:
103
- print(f"\n❌ Error: {e}")
104
- raise
105
-
106
- print("\n" + "=" * 60)
107
- print("Demo complete! Everything was REAL:")
108
- print(" - Real PubMed + ClinicalTrials + Europe PMC searches")
109
- print(" - Real LLM judge decisions")
110
- print(" - Real iterative refinement")
111
- print("=" * 60 + "\n")
112
-
113
-
114
- if __name__ == "__main__":
115
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/orchestrator_demo/run_magentic.py DELETED
@@ -1,96 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: Magentic-One Orchestrator for DeepCritical.
4
-
5
- This script demonstrates Phase 5 functionality:
6
- - Multi-Agent Coordination (Searcher + Judge + Manager)
7
- - Magentic-One Workflow
8
-
9
- Usage:
10
- export OPENAI_API_KEY=...
11
- uv run python examples/orchestrator_demo/run_magentic.py "metformin cancer"
12
- """
13
-
14
- import argparse
15
- import asyncio
16
- import os
17
- import sys
18
-
19
- from src.agent_factory.judges import JudgeHandler
20
- from src.orchestrator_factory import create_orchestrator
21
- from src.tools.clinicaltrials import ClinicalTrialsTool
22
- from src.tools.europepmc import EuropePMCTool
23
- from src.tools.pubmed import PubMedTool
24
- from src.tools.search_handler import SearchHandler
25
- from src.utils.models import OrchestratorConfig
26
-
27
-
28
- async def main() -> None:
29
- """Run the magentic agent demo."""
30
- parser = argparse.ArgumentParser(description="Run DeepCritical Magentic Agent")
31
- parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
32
- parser.add_argument("--iterations", type=int, default=10, help="Max rounds")
33
- args = parser.parse_args()
34
-
35
- # Check for OpenAI key specifically - Magentic requires function calling
36
- # which is only supported by OpenAI's API (not Anthropic or HF Inference)
37
- if not os.getenv("OPENAI_API_KEY"):
38
- print("Error: OPENAI_API_KEY required. Magentic uses function calling")
39
- print(" which requires OpenAI's API. For other providers, use mode='simple'.")
40
- sys.exit(1)
41
-
42
- print(f"\n{'=' * 60}")
43
- print("DeepCritical Magentic Agent Demo")
44
- print(f"Query: {args.query}")
45
- print("Mode: MAGENTIC (Multi-Agent)")
46
- print(f"{'=' * 60}\n")
47
-
48
- # 1. Setup Search Tools
49
- search_handler = SearchHandler(
50
- tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
51
- )
52
-
53
- # 2. Setup Judge
54
- judge_handler = JudgeHandler()
55
-
56
- # 3. Setup Orchestrator via Factory
57
- config = OrchestratorConfig(max_iterations=args.iterations)
58
- orchestrator = create_orchestrator(
59
- search_handler=search_handler,
60
- judge_handler=judge_handler,
61
- config=config,
62
- mode="magentic",
63
- )
64
-
65
- if not orchestrator:
66
- print("Failed to create Magentic orchestrator. Is agent-framework installed?")
67
- sys.exit(1)
68
-
69
- # 4. Run Loop
70
- try:
71
- async for event in orchestrator.run(args.query):
72
- # Print event with icon
73
- # Clean up markdown for CLI
74
- msg_obj = event.message
75
- msg_text = ""
76
- if hasattr(msg_obj, "text"):
77
- msg_text = msg_obj.text
78
- else:
79
- msg_text = str(msg_obj)
80
-
81
- msg = msg_text.replace("\n", " ").replace("**", "")[:150]
82
- print(f"[{event.type.upper()}] {msg}...")
83
-
84
- if event.type == "complete":
85
- print("\n--- FINAL OUTPUT ---\n")
86
- print(msg_text)
87
-
88
- except Exception as e:
89
- print(f"\n❌ Error: {e}")
90
- import traceback
91
-
92
- traceback.print_exc()
93
-
94
-
95
- if __name__ == "__main__":
96
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/rate_limiting_demo.py DELETED
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Demo script to verify rate limiting works correctly."""
3
-
4
- import asyncio
5
- import time
6
-
7
- from src.tools.pubmed import PubMedTool
8
- from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
9
-
10
-
11
- async def test_basic_limiter():
12
- """Test basic rate limiter behavior."""
13
- print("=" * 60)
14
- print("Rate Limiting Demo")
15
- print("=" * 60)
16
-
17
- # Test 1: Basic limiter
18
- print("\n[Test 1] Testing 3/second limiter...")
19
- limiter = RateLimiter("3/second")
20
-
21
- start = time.monotonic()
22
- for i in range(6):
23
- await limiter.acquire()
24
- elapsed = time.monotonic() - start
25
- print(f" Request {i + 1} at {elapsed:.2f}s")
26
-
27
- total = time.monotonic() - start
28
- print(f" Total time for 6 requests: {total:.2f}s (expected ~2s)")
29
-
30
-
31
- async def test_pubmed_limiter():
32
- """Test PubMed-specific limiter."""
33
- print("\n[Test 2] Testing PubMed limiter (shared)...")
34
-
35
- reset_pubmed_limiter() # Clean state
36
-
37
- # Without API key: 3/sec
38
- limiter = get_pubmed_limiter(api_key=None)
39
- print(f" Rate without key: {limiter.rate}")
40
-
41
- # Multiple tools should share the same limiter
42
- tool1 = PubMedTool()
43
- tool2 = PubMedTool()
44
-
45
- # Verify they share the limiter
46
- print(f" Tools share limiter: {tool1._limiter is tool2._limiter}")
47
-
48
-
49
- async def test_concurrent_requests():
50
- """Test rate limiting under concurrent load."""
51
- print("\n[Test 3] Testing concurrent request limiting...")
52
-
53
- limiter = RateLimiter("5/second")
54
-
55
- async def make_request(i: int):
56
- await limiter.acquire()
57
- return time.monotonic()
58
-
59
- start = time.monotonic()
60
- # Launch 10 concurrent requests
61
- tasks = [make_request(i) for i in range(10)]
62
- times = await asyncio.gather(*tasks)
63
-
64
- # Calculate distribution
65
- relative_times = [t - start for t in times]
66
- print(f" Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
67
-
68
- total = max(relative_times)
69
- print(f" All 10 requests completed in {total:.2f}s (expected ~2s)")
70
-
71
-
72
- async def main():
73
- await test_basic_limiter()
74
- await test_pubmed_limiter()
75
- await test_concurrent_requests()
76
-
77
- print("\n" + "=" * 60)
78
- print("Demo complete!")
79
-
80
-
81
- if __name__ == "__main__":
82
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/search_demo/run_search.py DELETED
@@ -1,67 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Demo: Search for biomedical research evidence.
4
-
5
- This script demonstrates multi-source search functionality:
6
- - PubMed search (biomedical literature)
7
- - ClinicalTrials.gov search (clinical trial evidence)
8
- - SearchHandler (parallel scatter-gather orchestration)
9
-
10
- Usage:
11
- # From project root:
12
- uv run python examples/search_demo/run_search.py
13
-
14
- # With custom query:
15
- uv run python examples/search_demo/run_search.py "metformin cancer"
16
-
17
- Requirements:
18
- - Optional: NCBI_API_KEY in .env for higher PubMed rate limits
19
- """
20
-
21
- import asyncio
22
- import sys
23
-
24
- from src.tools.clinicaltrials import ClinicalTrialsTool
25
- from src.tools.europepmc import EuropePMCTool
26
- from src.tools.pubmed import PubMedTool
27
- from src.tools.search_handler import SearchHandler
28
-
29
-
30
- async def main(query: str) -> None:
31
- """Run search demo with the given query."""
32
- print(f"\n{'=' * 60}")
33
- print("The DETERMINATOR Search Demo")
34
- print(f"Query: {query}")
35
- print(f"{'=' * 60}\n")
36
-
37
- # Initialize tools
38
- pubmed = PubMedTool()
39
- trials = ClinicalTrialsTool()
40
- preprints = EuropePMCTool()
41
- handler = SearchHandler(tools=[pubmed, trials, preprints], timeout=30.0)
42
-
43
- # Execute search
44
- print("Searching PubMed, ClinicalTrials.gov, and Europe PMC in parallel...")
45
- result = await handler.execute(query, max_results_per_tool=5)
46
-
47
- # Display results
48
- print(f"\n{'=' * 60}")
49
- print(f"Results: {result.total_found} pieces of evidence")
50
- print(f"Sources: {', '.join(result.sources_searched)}")
51
- if result.errors:
52
- print(f"Errors: {result.errors}")
53
- print(f"{'=' * 60}\n")
54
-
55
- for i, evidence in enumerate(result.evidence, 1):
56
- print(f"[{i}] {evidence.citation.source.upper()}: {evidence.citation.title[:80]}...")
57
- print(f" URL: {evidence.citation.url}")
58
- print(f" Content: {evidence.content[:150]}...")
59
- print()
60
-
61
-
62
- if __name__ == "__main__":
63
- # Default query or use command line arg
64
- default_query = "metformin Alzheimer's disease treatment mechanisms"
65
- query = sys.argv[1] if len(sys.argv) > 1 else default_query
66
-
67
- asyncio.run(main(query))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/middleware/state_machine.py CHANGED
@@ -136,3 +136,4 @@ def get_workflow_state() -> WorkflowState:
136
 
137
 
138
 
 
 
136
 
137
 
138
 
139
+
src/tools/searchxng_web_search.py CHANGED
@@ -122,3 +122,4 @@ class SearchXNGWebSearchTool:
122
 
123
 
124
 
 
 
122
 
123
 
124
 
125
+
src/tools/serper_web_search.py CHANGED
@@ -122,3 +122,4 @@ class SerperWebSearchTool:
122
 
123
 
124
 
 
 
122
 
123
 
124
 
125
+
src/tools/vendored/crawl_website.py CHANGED
@@ -134,3 +134,4 @@ async def crawl_website(starting_url: str) -> list[ScrapeResult] | str:
134
 
135
 
136
 
 
 
134
 
135
 
136
 
137
+
src/tools/vendored/searchxng_client.py CHANGED
@@ -103,3 +103,4 @@ class SearchXNGClient:
103
 
104
 
105
 
 
 
103
 
104
 
105
 
106
+
src/tools/vendored/serper_client.py CHANGED
@@ -99,3 +99,4 @@ class SerperClient:
99
 
100
 
101
 
 
 
99
 
100
 
101
 
102
+
src/tools/vendored/web_search_core.py CHANGED
@@ -208,3 +208,4 @@ def is_valid_url(url: str) -> bool:
208
 
209
 
210
 
 
 
208
 
209
 
210
 
211
+
src/tools/web_search_factory.py CHANGED
@@ -75,3 +75,4 @@ def create_web_search_tool() -> SearchTool | None:
75
 
76
 
77
 
 
 
75
 
76
 
77
 
78
+
src/utils/markdown.css CHANGED
@@ -13,3 +13,4 @@ body {
13
 
14
 
15
 
 
 
13
 
14
 
15
 
16
+
src/utils/md_to_pdf.py CHANGED
@@ -73,3 +73,4 @@ def md_to_pdf(md_text: str, pdf_file_path: str) -> None:
73
 
74
 
75
 
 
 
73
 
74
 
75
 
76
+
src/utils/report_generator.py CHANGED
@@ -176,3 +176,4 @@ def generate_report_from_evidence(
176
 
177
 
178
 
 
 
176
 
177
 
178
 
179
+
tests/unit/middleware/test_budget_tracker_phase7.py CHANGED
@@ -159,3 +159,4 @@ class TestIterationTokenTracking:
159
  assert budget2.iteration_tokens[1] == 200
160
 
161
 
 
 
159
  assert budget2.iteration_tokens[1] == 200
160
 
161
 
162
+
tests/unit/middleware/test_state_machine.py CHANGED
@@ -357,3 +357,4 @@ class TestContextVarIsolation:
357
 
358
 
359
 
 
 
357
 
358
 
359
 
360
+
tests/unit/middleware/test_workflow_manager.py CHANGED
@@ -285,3 +285,4 @@ class TestWorkflowManager:
285
  assert len(shared) == 1
286
  assert shared[0].content == "Shared"
287
 
 
 
285
  assert len(shared) == 1
286
  assert shared[0].content == "Shared"
287
 
288
+