Zach Wentz commited on
Commit
fe49f51
ยท
1 Parent(s): 50a50c2

๐Ÿค– Deploy openspiel_env environment - 2025-10-19 22:32:50

Browse files
Files changed (50) hide show
  1. .gitattributes +0 -35
  2. Dockerfile +43 -0
  3. README.md +43 -5
  4. src/core/__init__.py +19 -0
  5. src/core/__pycache__/__init__.cpython-311.pyc +0 -0
  6. src/core/__pycache__/__init__.cpython-313.pyc +0 -0
  7. src/core/__pycache__/http_env_client.cpython-311.pyc +0 -0
  8. src/core/__pycache__/types.cpython-311.pyc +0 -0
  9. src/core/containers/__init__.py +7 -0
  10. src/core/containers/__pycache__/__init__.cpython-311.pyc +0 -0
  11. src/core/containers/images/Dockerfile +46 -0
  12. src/core/containers/images/README.md +92 -0
  13. src/core/containers/runtime/__init__.py +15 -0
  14. src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc +0 -0
  15. src/core/containers/runtime/__pycache__/providers.cpython-311.pyc +0 -0
  16. src/core/containers/runtime/providers.py +289 -0
  17. src/core/containers/test_local_docker_provider.py +258 -0
  18. src/core/env_server/__init__.py +35 -0
  19. src/core/env_server/__pycache__/__init__.cpython-311.pyc +0 -0
  20. src/core/env_server/__pycache__/__init__.cpython-313.pyc +0 -0
  21. src/core/env_server/__pycache__/base_transforms.cpython-311.pyc +0 -0
  22. src/core/env_server/__pycache__/base_transforms.cpython-313.pyc +0 -0
  23. src/core/env_server/__pycache__/http_server.cpython-311.pyc +0 -0
  24. src/core/env_server/__pycache__/http_server.cpython-313.pyc +0 -0
  25. src/core/env_server/__pycache__/interfaces.cpython-311.pyc +0 -0
  26. src/core/env_server/__pycache__/interfaces.cpython-313.pyc +0 -0
  27. src/core/env_server/__pycache__/types.cpython-311.pyc +0 -0
  28. src/core/env_server/__pycache__/types.cpython-313.pyc +0 -0
  29. src/core/env_server/__pycache__/web_interface.cpython-311.pyc +0 -0
  30. src/core/env_server/base_transforms.py +29 -0
  31. src/core/env_server/http_server.py +231 -0
  32. src/core/env_server/interfaces.py +118 -0
  33. src/core/env_server/types.py +45 -0
  34. src/core/env_server/web_interface.py +764 -0
  35. src/core/http_env_client.py +175 -0
  36. src/core/tools/__init__.py +11 -0
  37. src/core/tools/local_python_executor.py +105 -0
  38. src/core/types.py +22 -0
  39. src/envs/openspiel_env/README.md +335 -0
  40. src/envs/openspiel_env/__init__.py +26 -0
  41. src/envs/openspiel_env/client.py +114 -0
  42. src/envs/openspiel_env/docker_issue.md +1 -0
  43. src/envs/openspiel_env/models.py +76 -0
  44. src/envs/openspiel_env/server/Dockerfile +85 -0
  45. src/envs/openspiel_env/server/__init__.py +7 -0
  46. src/envs/openspiel_env/server/app.py +55 -0
  47. src/envs/openspiel_env/server/build_docker.sh +69 -0
  48. src/envs/openspiel_env/server/openspiel_environment.py +266 -0
  49. src/envs/openspiel_env/server/opponent_policies.py +90 -0
  50. src/envs/openspiel_env/test_docker_all_games.sh +152 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build: First stage builds the base image
8
+ FROM python:3.11-slim as base-builder
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ curl \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Python dependencies that all environments need
16
+ RUN pip install --no-cache-dir \
17
+ fastapi>=0.104.0 \
18
+ "uvicorn[standard]>=0.24.0" \
19
+ requests>=2.25.0 \
20
+ wsproto>=1.0.0
21
+
22
+ # Set working directory
23
+ WORKDIR /app
24
+
25
+ # Default environment variables
26
+ ENV PYTHONPATH=/app/src
27
+ ENV PYTHONUNBUFFERED=1
28
+
29
+ # Second stage: Use the built base image and add environment-specific dependencies
30
+ FROM base-builder
31
+
32
+
33
+ # Copy only what's needed for this environment
34
+ COPY src/core/ /app/src/core/
35
+ COPY src/envs/openspiel_env/ /app/src/envs/openspiel_env/
36
+
37
+ # Health check
38
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
39
+ CMD curl -f http://localhost:8000/health || exit 1
40
+
41
+ # Run the FastAPI server
42
+ CMD ["uvicorn", "envs.openspiel_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
43
+ ENV ENABLE_WEB_INTERFACE=true
README.md CHANGED
@@ -1,10 +1,48 @@
1
  ---
2
- title: Openspiel Env
3
- emoji: ๐Ÿจ
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Openspiel_env Environment Server
3
+ emoji: ๐Ÿณ
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
  ---
11
 
12
+ # Openspiel_env Environment Server
13
+
14
+ FastAPI server for openspiel_env environment powered by Meta's OpenEnv.
15
+
16
+ ## About
17
+
18
+ This Space provides a containerized environment for openspiel_env interactions.
19
+ Built with FastAPI and OpenEnv framework.
20
+
21
+ ## Web Interface
22
+
23
+ This deployment includes an interactive web interface for exploring the environment:
24
+ - **HumanAgent Interface**: Interact with the environment using a web form
25
+ - **State Observer**: Real-time view of environment state and action history
26
+ - **Live Updates**: WebSocket-based real-time updates
27
+
28
+ Access the web interface at: `/web`
29
+
30
+ ## OpenSpiel Environment
31
+
32
+ Provides access to OpenSpiel games for multi-agent reinforcement learning.
33
+
34
+ ### Usage
35
+ Send a POST request to `/step` with:
36
+ ```json
37
+ {
38
+ "action": 0
39
+ }
40
+ ```
41
+
42
+ ## API Documentation
43
+
44
+ Visit `/docs` for interactive API documentation.
45
+
46
+ ## Health Check
47
+
48
+ The environment provides a health check endpoint at `/health`.
src/core/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core components for agentic environments."""
8
+
9
+ # Re-export main components from submodules for convenience
10
+ from .env_server import *
11
+ from .http_env_client import HTTPEnvClient
12
+ from .types import StepResult
13
+
14
+ # Note: MCP module doesn't export anything yet
15
+
16
+ __all__ = [
17
+ "HTTPEnvClient",
18
+ "StepResult",
19
+ ]
src/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (400 Bytes). View file
 
src/core/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (383 Bytes). View file
 
src/core/__pycache__/http_env_client.cpython-311.pyc ADDED
Binary file (7.68 kB). View file
 
src/core/__pycache__/types.cpython-311.pyc ADDED
Binary file (1.09 kB). View file
 
src/core/containers/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Container management for environment servers."""
src/core/containers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (206 Bytes). View file
 
src/core/containers/images/Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ #
8
+ # OpenEnv Base Image
9
+ #
10
+ # This is the standard base image for all OpenEnv environment servers.
11
+ # It includes the minimal dependencies needed to run HTTP environment servers.
12
+ #
13
+ # Build: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
14
+ # Tag: docker tag openenv-base:latest openenv-base:0.1.0
15
+ #
16
+
17
+ FROM python:3.11-slim
18
+
19
+ # Set metadata
20
+ LABEL maintainer="OpenEnv Team"
21
+ LABEL description="Base image for OpenEnv based environment servers"
22
+ LABEL version="0.1.0"
23
+
24
+ # Install system dependencies
25
+ RUN apt-get update && apt-get install -y --no-install-recommends \
26
+ curl \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Install Python dependencies that all environments need
30
+ RUN pip install --no-cache-dir \
31
+ fastapi>=0.104.0 \
32
+ "uvicorn[standard]>=0.24.0" \
33
+ requests>=2.25.0 \
34
+ wsproto>=1.0.0
35
+
36
+ # Set working directory
37
+ WORKDIR /app
38
+
39
+ # Default environment variables
40
+ ENV PYTHONPATH=/app/src
41
+ ENV PYTHONUNBUFFERED=1
42
+
43
+ # Default expose port (can be overridden)
44
+ EXPOSE 8000
45
+
46
+ # Note: CMD should be specified in child Dockerfiles
src/core/containers/images/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenEnv Base Image
2
+
3
+ Standard base image for all OpenEnv environment servers.
4
+
5
+ ## What's Included
6
+
7
+ | Layer | Size | Contents |
8
+ |-------|------|----------|
9
+ | python:3.11-slim | 200 MB | Base Python runtime |
10
+ | + Dependencies | 100 MB | FastAPI, uvicorn, requests |
11
+ | **Total** | **~300 MB** | Ready for environment servers |
12
+
13
+ ## Image Sizes
14
+
15
+ ```
16
+ openenv-base:latest 300 MB (python + fastapi + uvicorn)
17
+ ```
18
+ echo-env:latest 500 MB (python + fastapi + uvicorn + app)
19
+ coding-env:latest 520 MB (python + fastapi + uvicorn + app + tools)
20
+ another-env:latest 510 MB (python + fastapi + uvicorn + app)
21
+ ---
22
+ Total: 1.5 GB (with lots of duplication)
23
+ ```
24
+
25
+ ### With Base Images (โœ… Solution)
26
+ ```
27
+ openenv-base:latest 300 MB (python + fastapi + uvicorn)
28
+ echo-env:latest 50 MB (app only, uses base)
29
+ coding-env:latest 70 MB (app + tools, uses base)
30
+ another-env:latest 45 MB (app only, uses base)
31
+ ---
32
+ Total: 465 MB (base shared, minimal duplication)
33
+ ```
34
+
35
+ ## Building the Base Image
36
+
37
+ ```bash
38
+ # From project root
39
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
40
+ ```
41
+
42
+ ## Usage in Environment Dockerfiles
43
+
44
+ Each environment Dockerfile should start with:
45
+
46
+ ```dockerfile
47
+ FROM openenv-base:latest
48
+
49
+ # Copy only environment-specific files
50
+ COPY src/core/ /app/src/core/
51
+ COPY src/envs/my_env/ /app/src/envs/my_env/
52
+
53
+ # Run the server
54
+ CMD ["uvicorn", "envs.my_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
55
+ ```
56
+
57
+ ## Base Image Contents
58
+
59
+ - Python 3.11-slim
60
+ - FastAPI >= 0.104.0
61
+ - Uvicorn >= 0.24.0
62
+ - Requests >= 2.25.0
63
+ - curl (for health checks)
64
+
65
+ ## Example: Building Echo Environment
66
+
67
+ ```bash
68
+ # Step 1: Build base image (do this once)
69
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
70
+
71
+ # Step 2: Build echo environment (uses base)
72
+ docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
73
+
74
+ # Step 3: Run echo environment
75
+ docker run -p 8000:8000 echo-env:latest
76
+ ```
77
+
78
+ ## Updating the Base
79
+
80
+ When dependencies need updating:
81
+
82
+ 1. Update `src/core/containers/images/Dockerfile`
83
+ 2. Rebuild base image
84
+ 3. Rebuild all environment images (they'll use new base)
85
+
86
+ ```bash
87
+ # Update base
88
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
89
+
90
+ # Rebuild environments (they automatically use new base)
91
+ docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
92
+ ```
src/core/containers/runtime/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Container runtime providers."""
8
+
9
+ from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
10
+
11
+ __all__ = [
12
+ "ContainerProvider",
13
+ "LocalDockerProvider",
14
+ "KubernetesProvider",
15
+ ]
src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (389 Bytes). View file
 
src/core/containers/runtime/__pycache__/providers.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
src/core/containers/runtime/providers.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Container provider abstractions for running environment servers.
9
+
10
+ This module provides a pluggable architecture for different container providers
11
+ (local Docker, Kubernetes, cloud providers, etc.) to be used with HTTPEnvClient.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from abc import ABC, abstractmethod
17
+ from typing import Any, Dict, Optional
18
+
19
+
20
+ class ContainerProvider(ABC):
21
+ """
22
+ Abstract base class for container providers.
23
+
24
+ Providers implement this interface to support different container platforms:
25
+ - LocalDockerProvider: Runs containers on local Docker daemon
26
+ - KubernetesProvider: Runs containers in Kubernetes cluster
27
+ - FargateProvider: Runs containers on AWS Fargate
28
+ - CloudRunProvider: Runs containers on Google Cloud Run
29
+
30
+ The provider manages a single container lifecycle and provides the base URL
31
+ for connecting to it.
32
+
33
+ Example:
34
+ >>> provider = LocalDockerProvider()
35
+ >>> base_url = provider.start_container("echo-env:latest")
36
+ >>> print(base_url) # http://localhost:8000
37
+ >>> # Use the environment via base_url
38
+ >>> provider.stop_container()
39
+ """
40
+
41
+ @abstractmethod
42
+ def start_container(
43
+ self,
44
+ image: str,
45
+ port: Optional[int] = None,
46
+ env_vars: Optional[Dict[str, str]] = None,
47
+ **kwargs: Any,
48
+ ) -> str:
49
+ """
50
+ Start a container from the specified image.
51
+
52
+ Args:
53
+ image: Container image name (e.g., "echo-env:latest")
54
+ port: Port to expose (if None, provider chooses)
55
+ env_vars: Environment variables to pass to container
56
+ **kwargs: Provider-specific options
57
+
58
+ Returns:
59
+ Base URL to connect to the container (e.g., "http://localhost:8000")
60
+
61
+ Raises:
62
+ RuntimeError: If container fails to start
63
+ """
64
+ pass
65
+
66
+ @abstractmethod
67
+ def stop_container(self) -> None:
68
+ """
69
+ Stop and remove the running container.
70
+
71
+ This cleans up the container that was started by start_container().
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
77
+ """
78
+ Wait for the container to be ready to accept requests.
79
+
80
+ This typically polls the /health endpoint until it returns 200.
81
+
82
+ Args:
83
+ base_url: Base URL of the container
84
+ timeout_s: Maximum time to wait
85
+
86
+ Raises:
87
+ TimeoutError: If container doesn't become ready in time
88
+ """
89
+ pass
90
+
91
+
92
+ class LocalDockerProvider(ContainerProvider):
93
+ """
94
+ Container provider for local Docker daemon.
95
+
96
+ This provider runs containers on the local machine using Docker.
97
+ Useful for development and testing.
98
+
99
+ Example:
100
+ >>> provider = LocalDockerProvider()
101
+ >>> base_url = provider.start_container("echo-env:latest")
102
+ >>> # Container running on http://localhost:<random-port>
103
+ >>> provider.stop_container()
104
+ """
105
+
106
+ def __init__(self):
107
+ """Initialize the local Docker provider."""
108
+ self._container_id: Optional[str] = None
109
+ self._container_name: Optional[str] = None
110
+
111
+ # Check if Docker is available
112
+ import subprocess
113
+
114
+ try:
115
+ subprocess.run(
116
+ ["docker", "version"],
117
+ check=True,
118
+ capture_output=True,
119
+ timeout=5,
120
+ )
121
+ except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
122
+ raise RuntimeError(
123
+ "Docker is not available. Please install Docker Desktop or Docker Engine."
124
+ )
125
+
126
+ def start_container(
127
+ self,
128
+ image: str,
129
+ port: Optional[int] = None,
130
+ env_vars: Optional[Dict[str, str]] = None,
131
+ **kwargs: Any,
132
+ ) -> str:
133
+ """
134
+ Start a Docker container locally.
135
+
136
+ Args:
137
+ image: Docker image name
138
+ port: Port to expose (if None, finds available port)
139
+ env_vars: Environment variables for the container
140
+ **kwargs: Additional Docker run options
141
+
142
+ Returns:
143
+ Base URL to connect to the container
144
+ """
145
+ import subprocess
146
+ import time
147
+
148
+ # Find available port if not specified
149
+ if port is None:
150
+ port = self._find_available_port()
151
+
152
+ # Generate container name
153
+ self._container_name = self._generate_container_name(image)
154
+
155
+ # Build docker run command
156
+ cmd = [
157
+ "docker", "run",
158
+ "-d", # Detached
159
+ "--name", self._container_name,
160
+ "-p", f"{port}:8000", # Map port
161
+ ]
162
+
163
+ # Add environment variables
164
+ if env_vars:
165
+ for key, value in env_vars.items():
166
+ cmd.extend(["-e", f"{key}={value}"])
167
+
168
+ # Add image
169
+ cmd.append(image)
170
+
171
+ # Run container
172
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
173
+ self._container_id = result.stdout.strip()
174
+
175
+ # Wait a moment for container to start
176
+ time.sleep(1)
177
+
178
+ base_url = f"http://localhost:{port}"
179
+ return base_url
180
+
181
+ def stop_container(self) -> None:
182
+ """
183
+ Stop and remove the Docker container.
184
+ """
185
+ if self._container_id is None:
186
+ return
187
+
188
+ import subprocess
189
+
190
+ try:
191
+ # Stop container
192
+ subprocess.run(
193
+ ["docker", "stop", self._container_id],
194
+ capture_output=True,
195
+ check=True,
196
+ timeout=10,
197
+ )
198
+
199
+ # Remove container
200
+ subprocess.run(
201
+ ["docker", "rm", self._container_id],
202
+ capture_output=True,
203
+ check=True,
204
+ timeout=10,
205
+ )
206
+ except subprocess.CalledProcessError:
207
+ # Container might already be stopped/removed
208
+ pass
209
+ finally:
210
+ self._container_id = None
211
+ self._container_name = None
212
+
213
+ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
214
+ """
215
+ Wait for container to be ready by polling /health endpoint.
216
+
217
+ Args:
218
+ base_url: Base URL of the container
219
+ timeout_s: Maximum time to wait
220
+
221
+ Raises:
222
+ TimeoutError: If container doesn't become ready
223
+ """
224
+ import time
225
+ import requests
226
+
227
+ start_time = time.time()
228
+ health_url = f"{base_url}/health"
229
+
230
+ while time.time() - start_time < timeout_s:
231
+ try:
232
+ response = requests.get(health_url, timeout=2.0)
233
+ if response.status_code == 200:
234
+ return
235
+ except requests.RequestException:
236
+ pass
237
+
238
+ time.sleep(0.5)
239
+
240
+ raise TimeoutError(
241
+ f"Container at {base_url} did not become ready within {timeout_s}s"
242
+ )
243
+
244
+ def _find_available_port(self) -> int:
245
+ """
246
+ Find an available port on localhost.
247
+
248
+ Returns:
249
+ An available port number
250
+ """
251
+ import socket
252
+
253
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
254
+ s.bind(("", 0))
255
+ s.listen(1)
256
+ port = s.getsockname()[1]
257
+ return port
258
+
259
+ def _generate_container_name(self, image: str) -> str:
260
+ """
261
+ Generate a unique container name based on image name and timestamp.
262
+
263
+ Args:
264
+ image: Docker image name
265
+
266
+ Returns:
267
+ A unique container name
268
+ """
269
+ import time
270
+
271
+ clean_image = image.split("/")[-1].split(":")[0]
272
+ timestamp = int(time.time() * 1000)
273
+ return f"{clean_image}-{timestamp}"
274
+
275
+
276
+ class KubernetesProvider(ContainerProvider):
277
+ """
278
+ Container provider for Kubernetes clusters.
279
+
280
+ This provider creates pods in a Kubernetes cluster and exposes them
281
+ via services or port-forwarding.
282
+
283
+ Example:
284
+ >>> provider = KubernetesProvider(namespace="envtorch-dev")
285
+ >>> base_url = provider.start_container("echo-env:latest")
286
+ >>> # Pod running in k8s, accessible via service or port-forward
287
+ >>> provider.stop_container()
288
+ """
289
+ pass
src/core/containers/test_local_docker_provider.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ End-to-end test for LocalDockerProvider.
4
+
5
+ This script tests the complete flow:
6
+ 1. Start a container using LocalDockerProvider
7
+ 2. Wait for it to be ready
8
+ 3. Make HTTP requests to test the environment
9
+ 4. Clean up the container
10
+ """
11
+
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ # Add src to path
16
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
17
+
18
+ import requests
19
+
20
+ from core.containers.runtime import LocalDockerProvider
21
+
22
+ # TODO: Remove this test or make it a functional test sicne this will be tested in e2e test for echo env
23
+ def test_local_docker_provider():
24
+ """Test LocalDockerProvider end-to-end."""
25
+ print("=" * 60)
26
+ print("LocalDockerProvider End-to-End Test")
27
+ print("=" * 60)
28
+ print()
29
+
30
+ provider = None
31
+
32
+ try:
33
+ # Step 1: Create provider
34
+ print("Step 1: Creating LocalDockerProvider...")
35
+ provider = LocalDockerProvider()
36
+ print("โœ“ Provider created\n")
37
+
38
+ # Step 2: Start container
39
+ print("Step 2: Starting echo-env container...")
40
+ base_url = provider.start_container("echo-env:latest")
41
+ print(f"โœ“ Container started at: {base_url}")
42
+ if provider._container_id:
43
+ print(f" Container ID: {provider._container_id[:12]}...")
44
+ if provider._container_name:
45
+ print(f" Container name: {provider._container_name}\n")
46
+
47
+ # Step 3: Wait for ready
48
+ print("Step 3: Waiting for container to be ready...")
49
+ provider.wait_for_ready(base_url, timeout_s=30.0)
50
+ print("โœ“ Container is ready!\n")
51
+
52
+ # Step 4: Test health endpoint
53
+ print("Step 4: Testing /health endpoint...")
54
+ response = requests.get(f"{base_url}/health")
55
+ print(f" Status: {response.status_code}")
56
+ print(f" Response: {response.json()}")
57
+ assert response.status_code == 200
58
+ assert response.json()["status"] == "healthy"
59
+ print("โœ“ Health check passed\n")
60
+
61
+ # Step 5: Test reset endpoint
62
+ print("Step 5: Testing /reset endpoint...")
63
+ response = requests.post(
64
+ f"{base_url}/reset",
65
+ json={},
66
+ headers={"Content-Type": "application/json"},
67
+ )
68
+ print(f" Status: {response.status_code}")
69
+ data = response.json()
70
+ print(f" Message: {data['observation']['echoed_message']}")
71
+ print(f" Reward: {data['reward']}")
72
+ print(f" Done: {data['done']}")
73
+ assert response.status_code == 200
74
+ assert data["observation"]["echoed_message"] == "Echo environment ready!"
75
+ print("โœ“ Reset test passed\n")
76
+
77
+ # Step 6: Test step endpoint
78
+ print("Step 6: Testing /step endpoint...")
79
+ response = requests.post(
80
+ f"{base_url}/step",
81
+ json={"action": {"message": "Hello from LocalDockerProvider!"}},
82
+ headers={"Content-Type": "application/json"},
83
+ )
84
+ print(f" Status: {response.status_code}")
85
+ data = response.json()
86
+ print(f" Echoed: {data['observation']['echoed_message']}")
87
+ print(f" Length: {data['observation']['message_length']}")
88
+ print(f" Reward: {data['reward']}")
89
+ assert response.status_code == 200
90
+ assert data["observation"]["echoed_message"] == "Hello from LocalDockerProvider!"
91
+ assert data["observation"]["message_length"] == 31
92
+ print("โœ“ Step test passed\n")
93
+
94
+ # Step 7: Test state endpoint
95
+ print("Step 7: Testing /state endpoint...")
96
+ response = requests.get(f"{base_url}/state")
97
+ print(f" Status: {response.status_code}")
98
+ data = response.json()
99
+ print(f" Episode ID: {data['episode_id']}")
100
+ print(f" Step count: {data['step_count']}")
101
+ assert response.status_code == 200
102
+ assert data["step_count"] == 1 # One step from above
103
+ print("โœ“ State test passed\n")
104
+
105
+ # Step 8: Multiple steps
106
+ print("Step 8: Testing multiple steps...")
107
+ for i in range(3):
108
+ response = requests.post(
109
+ f"{base_url}/step",
110
+ json={"action": {"message": f"Message {i+1}"}},
111
+ headers={"Content-Type": "application/json"},
112
+ )
113
+ assert response.status_code == 200
114
+ print(f" Step {i+1}: โœ“")
115
+
116
+ # Check state updated
117
+ response = requests.get(f"{base_url}/state")
118
+ data = response.json()
119
+ assert data["step_count"] == 4 # 1 + 3 more steps
120
+ print(f" Final step count: {data['step_count']}")
121
+ print("โœ“ Multiple steps test passed\n")
122
+
123
+ print("=" * 60)
124
+ print("โœ“ All tests passed!")
125
+ print("=" * 60)
126
+ print()
127
+
128
+ return True
129
+
130
+ except Exception as e:
131
+ print(f"\nโŒ Test failed: {e}")
132
+ import traceback
133
+ traceback.print_exc()
134
+ return False
135
+
136
+ finally:
137
+ # Step 9: Cleanup
138
+ if provider is not None:
139
+ print("\nStep 9: Cleaning up container...")
140
+ try:
141
+ provider.stop_container()
142
+ print("โœ“ Container stopped and removed\n")
143
+ except Exception as e:
144
+ print(f"โš ๏ธ Cleanup warning: {e}\n")
145
+
146
+
147
+ def test_provider_with_custom_port():
148
+ """Test provider with custom port."""
149
+ print("=" * 60)
150
+ print("LocalDockerProvider with Custom Port Test")
151
+ print("=" * 60)
152
+ print()
153
+
154
+ provider = None
155
+
156
+ try:
157
+ provider = LocalDockerProvider()
158
+
159
+ print("Starting container on custom port 8123...")
160
+ base_url = provider.start_container("echo-env:latest", port=8123)
161
+ print(f"โœ“ Started at: {base_url}")
162
+ assert ":8123" in base_url
163
+
164
+ print("Waiting for ready...")
165
+ provider.wait_for_ready(base_url)
166
+ print("โœ“ Ready!")
167
+
168
+ print("Testing health...")
169
+ response = requests.get(f"{base_url}/health")
170
+ assert response.status_code == 200
171
+ print("โœ“ Health check passed")
172
+
173
+ print("\nโœ“ Custom port test passed!\n")
174
+ return True
175
+
176
+ except Exception as e:
177
+ print(f"\nโŒ Test failed: {e}")
178
+ return False
179
+
180
+ finally:
181
+ if provider is not None:
182
+ provider.stop_container()
183
+ print("โœ“ Cleaned up\n")
184
+
185
+
186
+ def test_provider_with_env_vars():
187
+ """Test provider with environment variables."""
188
+ print("=" * 60)
189
+ print("LocalDockerProvider with Environment Variables Test")
190
+ print("=" * 60)
191
+ print()
192
+
193
+ provider = None
194
+
195
+ try:
196
+ provider = LocalDockerProvider()
197
+
198
+ print("Starting container with environment variables...")
199
+ base_url = provider.start_container(
200
+ "echo-env:latest",
201
+ env_vars={"DEBUG": "true", "LOG_LEVEL": "info"}
202
+ )
203
+ print(f"โœ“ Started at: {base_url}")
204
+
205
+ print("Waiting for ready...")
206
+ provider.wait_for_ready(base_url)
207
+ print("โœ“ Ready!")
208
+
209
+ print("Testing health...")
210
+ response = requests.get(f"{base_url}/health")
211
+ assert response.status_code == 200
212
+ print("โœ“ Health check passed")
213
+
214
+ print("\nโœ“ Environment variables test passed!\n")
215
+ return True
216
+
217
+ except Exception as e:
218
+ print(f"\nโŒ Test failed: {e}")
219
+ return False
220
+
221
+ finally:
222
+ if provider is not None:
223
+ provider.stop_container()
224
+ print("โœ“ Cleaned up\n")
225
+
226
+
227
+ if __name__ == "__main__":
228
+ print()
229
+ print("๐Ÿณ LocalDockerProvider Test Suite")
230
+ print()
231
+
232
+ results = []
233
+
234
+ # Run basic test
235
+ results.append(("Basic End-to-End", test_local_docker_provider()))
236
+
237
+ # Run custom port test
238
+ results.append(("Custom Port", test_provider_with_custom_port()))
239
+
240
+ # Run environment variables test
241
+ results.append(("Environment Variables", test_provider_with_env_vars()))
242
+
243
+ # Summary
244
+ print("=" * 60)
245
+ print("Test Summary")
246
+ print("=" * 60)
247
+ for name, passed in results:
248
+ status = "โœ“ PASSED" if passed else "โœ— FAILED"
249
+ print(f"{name:25} {status}")
250
+ print("=" * 60)
251
+
252
+ all_passed = all(result for _, result in results)
253
+ if all_passed:
254
+ print("\n๐ŸŽ‰ All tests passed!")
255
+ exit(0)
256
+ else:
257
+ print("\nโŒ Some tests failed")
258
+ exit(1)
src/core/env_server/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core environment interfaces and types."""
8
+
9
+ from .base_transforms import CompositeTransform, NullTransform
10
+ from .http_server import HTTPEnvServer, create_app, create_fastapi_app
11
+ from .interfaces import Environment, Message, ModelTokenizer, Transform
12
+ from .types import Action, Observation, State
13
+ from .web_interface import create_web_interface_app, WebInterfaceManager
14
+
15
+ __all__ = [
16
+ # Core interfaces
17
+ "Environment",
18
+ "Transform",
19
+ "Message",
20
+ "ModelTokenizer",
21
+ # Types
22
+ "Action",
23
+ "Observation",
24
+ "State",
25
+ # Base transforms
26
+ "CompositeTransform",
27
+ "NullTransform",
28
+ # HTTP Server
29
+ "HTTPEnvServer",
30
+ "create_app",
31
+ "create_fastapi_app",
32
+ # Web Interface
33
+ "create_web_interface_app",
34
+ "WebInterfaceManager",
35
+ ]
src/core/env_server/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (898 Bytes). View file
 
src/core/env_server/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (940 Bytes). View file
 
src/core/env_server/__pycache__/base_transforms.cpython-311.pyc ADDED
Binary file (1.67 kB). View file
 
src/core/env_server/__pycache__/base_transforms.cpython-313.pyc ADDED
Binary file (1.57 kB). View file
 
src/core/env_server/__pycache__/http_server.cpython-311.pyc ADDED
Binary file (9.2 kB). View file
 
src/core/env_server/__pycache__/http_server.cpython-313.pyc ADDED
Binary file (7.14 kB). View file
 
src/core/env_server/__pycache__/interfaces.cpython-311.pyc ADDED
Binary file (5.22 kB). View file
 
src/core/env_server/__pycache__/interfaces.cpython-313.pyc ADDED
Binary file (4.68 kB). View file
 
src/core/env_server/__pycache__/types.cpython-311.pyc ADDED
Binary file (2.39 kB). View file
 
src/core/env_server/__pycache__/types.cpython-313.pyc ADDED
Binary file (2.1 kB). View file
 
src/core/env_server/__pycache__/web_interface.cpython-311.pyc ADDED
Binary file (29.9 kB). View file
 
src/core/env_server/base_transforms.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Base transform implementations for composing environment-specific transforms."""
8
+
9
+ from .interfaces import Transform
10
+ from .types import Observation
11
+
12
+
13
+ class CompositeTransform(Transform):
14
+ """Combines multiple transforms into a single transform."""
15
+
16
+ def __init__(self, transforms: list[Transform]):
17
+ self.transforms = transforms
18
+
19
+ def __call__(self, observation: Observation) -> Observation:
20
+ for transform in self.transforms:
21
+ observation = transform(observation)
22
+ return observation
23
+
24
+
25
+ class NullTransform(Transform):
26
+ """Default transform that passes through unchanged."""
27
+
28
+ def __call__(self, observation: Observation) -> Observation:
29
+ return observation
src/core/env_server/http_server.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ HTTP server wrapper for Environment instances.
9
+
10
+ This module provides utilities to wrap any Environment subclass and expose it
11
+ over HTTP endpoints that HTTPEnvClient can consume.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ from dataclasses import asdict
18
+ from typing import Any, Dict, Type
19
+
20
+ from .interfaces import Environment
21
+ from .types import Action, Observation
22
+ from fastapi import Body, FastAPI
23
+
24
+ class HTTPEnvServer:
25
+ """
26
+ HTTP server wrapper for Environment instances.
27
+
28
+ This class wraps an Environment and exposes its reset(), step(), and state
29
+ methods as HTTP endpoints compatible with HTTPEnvClient.
30
+
31
+ The server expects:
32
+ - Action deserialization: Converts JSON dict to Action subclass
33
+ - Observation serialization: Converts Observation subclass to JSON dict
34
+
35
+ Example:
36
+ >>> from core.env_server import HTTPEnvServer
37
+ >>> from envs.coding_env.server import CodeExecutionEnvironment
38
+ >>>
39
+ >>> env = CodeExecutionEnvironment()
40
+ >>> server = HTTPEnvServer(env)
41
+ >>>
42
+ >>> # Register routes with FastAPI
43
+ >>> from fastapi import FastAPI
44
+ >>> app = FastAPI()
45
+ >>> server.register_routes(app)
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ env: Environment,
51
+ action_cls: Type[Action],
52
+ observation_cls: Type[Observation],
53
+ ):
54
+ """
55
+ Initialize HTTP server wrapper.
56
+
57
+ Args:
58
+ env: The Environment instance to wrap
59
+ action_cls: The Action subclass this environment expects
60
+ observation_cls: The Observation subclass this environment returns
61
+ """
62
+ self.env = env
63
+ self.action_cls = action_cls
64
+ self.observation_cls = observation_cls
65
+
66
+ def register_routes(self, app: Any) -> None:
67
+ """
68
+ Register HTTP routes on a FastAPI application.
69
+
70
+ Args:
71
+ app: FastAPI application instance
72
+ """
73
+
74
+ if not isinstance(app, FastAPI):
75
+ raise TypeError("app must be a FastAPI instance")
76
+
77
+ @app.post("/reset")
78
+ async def reset(request: Dict[str, Any] = Body(default={})) -> Dict[str, Any]:
79
+ """Reset endpoint - returns initial observation."""
80
+ # TODO: Handle seed, episode_id from request if provided
81
+ observation = self.env.reset()
82
+ return self._serialize_observation(observation)
83
+
84
+ @app.post("/step")
85
+ async def step(request: Dict[str, Any]) -> Dict[str, Any]:
86
+ """Step endpoint - executes action and returns observation."""
87
+ action_data = request.get("action", {})
88
+ # TODO: Handle timeout_s, request_id, episode_id from request if provided
89
+
90
+ # Deserialize action
91
+ action = self._deserialize_action(action_data)
92
+
93
+ # Execute step
94
+ observation = self.env.step(action)
95
+
96
+ # Return serialized observation
97
+ return self._serialize_observation(observation)
98
+
99
+ @app.get("/state")
100
+ async def get_state() -> Dict[str, Any]:
101
+ """State endpoint - returns current environment state."""
102
+ state = self.env.state
103
+ return asdict(state)
104
+
105
+ @app.get("/health")
106
+ async def health() -> Dict[str, str]:
107
+ """Health check endpoint."""
108
+ return {"status": "healthy"}
109
+
110
+
111
+ def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
112
+ """
113
+ Convert JSON dict to Action instance.
114
+
115
+ Args:
116
+ action_data: Dictionary containing action data
117
+
118
+ Returns:
119
+ Action instance
120
+
121
+ Note:
122
+ This is a simple implementation. Subclasses may need to override
123
+ for more complex deserialization logic.
124
+ """
125
+ # Remove metadata if present (it will be set via kw_only field)
126
+ metadata = action_data.pop("metadata", {})
127
+ action = self.action_cls(**action_data)
128
+ action.metadata = metadata
129
+ return action
130
+
131
+ def _serialize_observation(self, observation: Observation) -> Dict[str, Any]:
132
+ """
133
+ Convert Observation instance to JSON-compatible dict.
134
+
135
+ Args:
136
+ observation: Observation instance
137
+
138
+ Returns:
139
+ Dictionary compatible with HTTPEnvClient._parse_result()
140
+
141
+ The format matches what HTTPEnvClient expects:
142
+ {
143
+ "observation": {...}, # Observation fields
144
+ "reward": float | None,
145
+ "done": bool,
146
+ }
147
+ """
148
+ obs_dict = asdict(observation)
149
+
150
+ # Extract reward and done (these are part of StepResult on client side)
151
+ reward = obs_dict.pop("reward", None)
152
+ done = obs_dict.pop("done", False)
153
+ obs_dict.pop("metadata", None) # Remove metadata from observation
154
+
155
+ # Return in HTTPEnvClient expected format
156
+ return {
157
+ "observation": obs_dict,
158
+ "reward": reward,
159
+ "done": done,
160
+ }
161
+
162
+ def create_app(
163
+ env: Environment,
164
+ action_cls: Type[Action],
165
+ observation_cls: Type[Observation],
166
+ ) -> Any:
167
+ """
168
+ Create a FastAPI application with web interface enabled for Hugging Face deployments.
169
+
170
+ This function checks for the ENABLE_WEB_INTERFACE environment variable to determine
171
+ whether to enable the web interface.
172
+
173
+ Args:
174
+ env: The Environment instance to serve
175
+ action_cls: The Action subclass this environment expects
176
+ observation_cls: The Observation subclass this environment returns
177
+
178
+ Returns:
179
+ FastAPI application instance with or without web interface based on environment
180
+ """
181
+ # Check if web interface should be enabled
182
+ # This can be controlled via environment variable or build argument
183
+ enable_web = (
184
+ os.getenv("ENABLE_WEB_INTERFACE", "false").lower() in ("true", "1", "yes")
185
+ )
186
+
187
+ if enable_web:
188
+ # Import web interface only when needed
189
+ from .web_interface import create_web_interface_app
190
+ return create_web_interface_app(env, action_cls, observation_cls)
191
+ else:
192
+ # Use standard FastAPI app without web interface
193
+ return create_fastapi_app(env, action_cls, observation_cls)
194
+
195
+
196
+ def create_fastapi_app(
197
+ env: Environment,
198
+ action_cls: Type[Action],
199
+ observation_cls: Type[Observation],
200
+ ) -> Any:
201
+ """
202
+ Create a FastAPI application with routes for the given environment.
203
+
204
+ Args:
205
+ env: The Environment instance to serve
206
+ action_cls: The Action subclass this environment expects
207
+ observation_cls: The Observation subclass this environment returns
208
+
209
+ Returns:
210
+ FastAPI application instance with routes registered
211
+
212
+ Example:
213
+ >>> from envs.coding_env.server import CodeExecutionEnvironment
214
+ >>> from envs.coding_env.models import CodeAction, CodeObservation
215
+ >>>
216
+ >>> env = CodeExecutionEnvironment()
217
+ >>> app = create_fastapi_app(env, CodeAction, CodeObservation)
218
+ >>>
219
+ >>> # Run with: uvicorn module:app --host 0.0.0.0 --port 8000
220
+ """
221
+ try:
222
+ from fastapi import FastAPI
223
+ except ImportError:
224
+ raise ImportError(
225
+ "FastAPI is required. Install with: pip install fastapi uvicorn"
226
+ )
227
+
228
+ app = FastAPI(title="Environment HTTP Server")
229
+ server = HTTPEnvServer(env, action_cls, observation_cls)
230
+ server.register_routes(app)
231
+ return app
src/core/env_server/interfaces.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Any, Protocol, TypedDict
9
+
10
+ from .types import Action, Observation, State
11
+
12
+
13
+ class Message(TypedDict):
14
+ """A message in a conversation.
15
+
16
+ Compatible with Huggingface chat template format.
17
+ """
18
+
19
+ role: str
20
+ content: str
21
+
22
+
23
+ class ModelTokenizer(Protocol):
24
+ """Protocol for tokenizers that support chat templates.
25
+
26
+ This protocol defines the interface that tokenizers must implement
27
+ to work with chat-based environments. It's compatible with
28
+ Huggingface transformers tokenizers.
29
+ """
30
+
31
+ def apply_chat_template(
32
+ self,
33
+ conversation: list[Message],
34
+ tokenize: bool = True,
35
+ return_tensors: str | None = None,
36
+ **kwargs: Any,
37
+ ) -> Any:
38
+ """Apply a chat template to format and optionally tokenize a conversation.
39
+
40
+ Args:
41
+ conversation: List of message dictionaries with 'role' and 'content'
42
+ tokenize: Whether to tokenize the output
43
+ return_tensors: Format for returned tensors ('pt' for PyTorch)
44
+ **kwargs: Additional arguments
45
+
46
+ Returns:
47
+ Formatted and optionally tokenized conversation
48
+ """
49
+ ...
50
+
51
+ def decode(
52
+ self, token_ids: Any, skip_special_tokens: bool = False, **kwargs: Any
53
+ ) -> str:
54
+ """Decode token IDs back to text.
55
+
56
+ Args:
57
+ token_ids: Token IDs to decode
58
+ skip_special_tokens: Whether to skip special tokens in output
59
+ **kwargs: Additional arguments
60
+
61
+ Returns:
62
+ Decoded text string
63
+ """
64
+ ...
65
+
66
+
67
+ class Transform(ABC):
68
+ """Transform observations to add rewards, metrics, or other modifications.
69
+
70
+ Transforms follow the TorchRL pattern where they take an observation
71
+ and return a (potentially modified) observation. This allows for
72
+ flexible reward computation and observation augmentation.
73
+ """
74
+
75
+ @abstractmethod
76
+ def __call__(self, observation: Observation) -> Observation:
77
+ """Transform an observation.
78
+
79
+ Args:
80
+ observation: The input observation
81
+
82
+ Returns:
83
+ The transformed observation
84
+ """
85
+ pass
86
+
87
+
88
+ class Environment(ABC):
89
+ """Base class for all environment servers following Gym/Gymnasium API.
90
+
91
+ Args:
92
+ transform: Optional transform to apply to observations
93
+ """
94
+
95
+ def __init__(self, transform: Transform | None = None):
96
+ self.transform = transform
97
+
98
+ @abstractmethod
99
+ def reset(self) -> Observation:
100
+ """Reset the environment and return initial observation."""
101
+ pass
102
+
103
+ @abstractmethod
104
+ def step(self, action: Action) -> Observation:
105
+ """Take a step in the environment."""
106
+ pass
107
+
108
+ @property
109
+ @abstractmethod
110
+ def state(self) -> State:
111
+ """Get the current environment state."""
112
+ pass
113
+
114
+ def _apply_transform(self, observation: Observation) -> Observation:
115
+ """Apply transform if one is provided."""
116
+ if self.transform is not None:
117
+ return self.transform(observation)
118
+ return observation
src/core/env_server/types.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+
11
+ # Type aliases
12
+ Scalar = Union[int, float, bool]
13
+
14
+
15
+ @dataclass(kw_only=True)
16
+ class Action:
17
+ """Base class for all environment actions."""
18
+
19
+ metadata: Dict[str, Any] = field(default_factory=dict)
20
+
21
+
22
+ @dataclass(kw_only=True)
23
+ class Observation:
24
+ """Base class for all environment observations."""
25
+
26
+ done: bool = False
27
+ reward: Union[bool, int, float, None] = None
28
+ metadata: Dict[str, Any] = field(default_factory=dict)
29
+
30
+
31
+ @dataclass
32
+ class State:
33
+ """Base class for environment state."""
34
+
35
+ episode_id: Optional[str] = None
36
+ step_count: int = 0
37
+
38
+
39
+ @dataclass
40
+ class CodeExecResult:
41
+ """Result of code execution containing stdout, stderr, and exit code."""
42
+
43
+ stdout: str
44
+ stderr: str
45
+ exit_code: int
src/core/env_server/web_interface.py ADDED
@@ -0,0 +1,764 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Web interface for OpenEnv environments.
9
+
10
+ This module provides a web-based interface for interacting with OpenEnv environments,
11
+ including a two-pane layout for HumanAgent interaction and state observation.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import time
18
+ from dataclasses import asdict, dataclass
19
+ from typing import Any, Dict, List, Optional, Type
20
+ from datetime import datetime
21
+
22
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
23
+ from fastapi.responses import HTMLResponse, FileResponse
24
+ from fastapi.staticfiles import StaticFiles
25
+ from pydantic import BaseModel
26
+
27
+ from .interfaces import Environment
28
+ from .types import Action, Observation, State
29
+
30
+
31
+ @dataclass
32
+ class ActionLog:
33
+ """Log entry for an action taken."""
34
+ timestamp: str
35
+ action: Dict[str, Any]
36
+ observation: Dict[str, Any]
37
+ reward: Optional[float]
38
+ done: bool
39
+ step_count: int
40
+
41
+
42
+ @dataclass
43
+ class EpisodeState:
44
+ """Current episode state for the web interface."""
45
+ episode_id: Optional[str]
46
+ step_count: int
47
+ current_observation: Optional[Dict[str, Any]]
48
+ action_logs: List[ActionLog]
49
+ is_reset: bool = True
50
+
51
+
52
+ class WebInterfaceManager:
53
+ """Manages the web interface for an environment."""
54
+
55
+ def __init__(
56
+ self,
57
+ env: Environment,
58
+ action_cls: Type[Action],
59
+ observation_cls: Type[Observation],
60
+ ):
61
+ self.env = env
62
+ self.action_cls = action_cls
63
+ self.observation_cls = observation_cls
64
+ self.episode_state = EpisodeState(
65
+ episode_id=None,
66
+ step_count=0,
67
+ current_observation=None,
68
+ action_logs=[]
69
+ )
70
+ self.connected_clients: List[WebSocket] = []
71
+
72
+ async def connect_websocket(self, websocket: WebSocket):
73
+ """Connect a new WebSocket client."""
74
+ await websocket.accept()
75
+ self.connected_clients.append(websocket)
76
+
77
+ # Send current state to the new client
78
+ await self._send_state_update()
79
+
80
+ async def disconnect_websocket(self, websocket: WebSocket):
81
+ """Disconnect a WebSocket client."""
82
+ if websocket in self.connected_clients:
83
+ self.connected_clients.remove(websocket)
84
+
85
+ async def _send_state_update(self):
86
+ """Send current state to all connected clients."""
87
+ if not self.connected_clients:
88
+ return
89
+
90
+ state_data = {
91
+ "type": "state_update",
92
+ "episode_state": asdict(self.episode_state)
93
+ }
94
+
95
+ # Send to all connected clients
96
+ disconnected_clients = []
97
+ for client in self.connected_clients:
98
+ try:
99
+ await client.send_text(json.dumps(state_data))
100
+ except:
101
+ disconnected_clients.append(client)
102
+
103
+ # Remove disconnected clients
104
+ for client in disconnected_clients:
105
+ self.connected_clients.remove(client)
106
+
107
+ async def reset_environment(self) -> Dict[str, Any]:
108
+ """Reset the environment and update state."""
109
+ observation = self.env.reset()
110
+ state = self.env.state
111
+
112
+ # Update episode state
113
+ self.episode_state.episode_id = state.episode_id
114
+ self.episode_state.step_count = 0
115
+ self.episode_state.current_observation = asdict(observation)
116
+ self.episode_state.action_logs = []
117
+ self.episode_state.is_reset = True
118
+
119
+ # Send state update
120
+ await self._send_state_update()
121
+
122
+ return {
123
+ "observation": asdict(observation),
124
+ "reward": observation.reward,
125
+ "done": observation.done,
126
+ }
127
+
128
+ async def step_environment(self, action_data: Dict[str, Any]) -> Dict[str, Any]:
129
+ """Execute a step in the environment and update state."""
130
+ # Deserialize action
131
+ action = self._deserialize_action(action_data)
132
+
133
+ # Execute step
134
+ observation = self.env.step(action)
135
+ state = self.env.state
136
+
137
+ # Create action log
138
+ action_log = ActionLog(
139
+ timestamp=datetime.now().isoformat(),
140
+ action=asdict(action),
141
+ observation=asdict(observation),
142
+ reward=observation.reward,
143
+ done=observation.done,
144
+ step_count=state.step_count
145
+ )
146
+
147
+ # Update episode state
148
+ self.episode_state.episode_id = state.episode_id
149
+ self.episode_state.step_count = state.step_count
150
+ self.episode_state.current_observation = asdict(observation)
151
+ self.episode_state.action_logs.append(action_log)
152
+ self.episode_state.is_reset = False
153
+
154
+ # Send state update
155
+ await self._send_state_update()
156
+
157
+ return {
158
+ "observation": asdict(observation),
159
+ "reward": observation.reward,
160
+ "done": observation.done,
161
+ }
162
+
163
+ def get_state(self) -> Dict[str, Any]:
164
+ """Get current environment state."""
165
+ state = self.env.state
166
+ return asdict(state)
167
+
168
+ def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
169
+ """Convert JSON dict to Action instance."""
170
+ metadata = action_data.pop("metadata", {})
171
+ action = self.action_cls(**action_data)
172
+ action.metadata = metadata
173
+ return action
174
+
175
+
176
+ def create_web_interface_app(
177
+ env: Environment,
178
+ action_cls: Type[Action],
179
+ observation_cls: Type[Observation],
180
+ ) -> FastAPI:
181
+ """
182
+ Create a FastAPI application with web interface for the given environment.
183
+
184
+ Args:
185
+ env: The Environment instance to serve
186
+ action_cls: The Action subclass this environment expects
187
+ observation_cls: The Observation subclass this environment returns
188
+
189
+ Returns:
190
+ FastAPI application instance with web interface
191
+ """
192
+ from .http_server import create_fastapi_app
193
+
194
+ # Create the base environment app
195
+ app = create_fastapi_app(env, action_cls, observation_cls)
196
+
197
+ # Create web interface manager
198
+ web_manager = WebInterfaceManager(env, action_cls, observation_cls)
199
+
200
+ # Add web interface routes
201
+ @app.get("/web", response_class=HTMLResponse)
202
+ async def web_interface():
203
+ """Serve the web interface."""
204
+ return get_web_interface_html(action_cls)
205
+
206
+ @app.websocket("/ws")
207
+ async def websocket_endpoint(websocket: WebSocket):
208
+ """WebSocket endpoint for real-time updates."""
209
+ await web_manager.connect_websocket(websocket)
210
+ try:
211
+ while True:
212
+ # Keep connection alive
213
+ await websocket.receive_text()
214
+ except WebSocketDisconnect:
215
+ await web_manager.disconnect_websocket(websocket)
216
+
217
+ @app.post("/web/reset")
218
+ async def web_reset():
219
+ """Reset endpoint for web interface."""
220
+ return await web_manager.reset_environment()
221
+
222
+ @app.post("/web/step")
223
+ async def web_step(request: Dict[str, Any]):
224
+ """Step endpoint for web interface."""
225
+ action_data = request.get("action", {})
226
+ return await web_manager.step_environment(action_data)
227
+
228
+ @app.get("/web/state")
229
+ async def web_state():
230
+ """State endpoint for web interface."""
231
+ return web_manager.get_state()
232
+
233
+ return app
234
+
235
+
236
+ def get_web_interface_html(action_cls: Type[Action]) -> str:
237
+ """Generate the HTML for the web interface."""
238
+
239
+ # Get action fields for dynamic form generation
240
+ action_fields = []
241
+ if hasattr(action_cls, '__dataclass_fields__'):
242
+ for field_name, field_info in action_cls.__dataclass_fields__.items():
243
+ if field_name != 'metadata':
244
+ field_type = field_info.type
245
+ if field_type == str:
246
+ input_type = "text"
247
+ elif field_type == int:
248
+ input_type = "number"
249
+ elif field_type == float:
250
+ input_type = "number"
251
+ elif field_type == bool:
252
+ input_type = "checkbox"
253
+ else:
254
+ input_type = "text"
255
+
256
+ action_fields.append({
257
+ 'name': field_name,
258
+ 'type': input_type,
259
+ 'required': field_info.default is field_info.default_factory
260
+ })
261
+
262
+ return f"""
263
+ <!DOCTYPE html>
264
+ <html lang="en">
265
+ <head>
266
+ <meta charset="UTF-8">
267
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
268
+ <title>OpenEnv Web Interface</title>
269
+ <style>
270
+ * {{
271
+ margin: 0;
272
+ padding: 0;
273
+ box-sizing: border-box;
274
+ }}
275
+
276
+ body {{
277
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
278
+ background-color: #f5f5f5;
279
+ height: 100vh;
280
+ overflow: hidden;
281
+ }}
282
+
283
+ .container {{
284
+ display: flex;
285
+ height: 100vh;
286
+ }}
287
+
288
+ .left-pane {{
289
+ width: 50%;
290
+ background: white;
291
+ border-right: 1px solid #e0e0e0;
292
+ display: flex;
293
+ flex-direction: column;
294
+ }}
295
+
296
+ .right-pane {{
297
+ width: 50%;
298
+ background: #fafafa;
299
+ display: flex;
300
+ flex-direction: column;
301
+ }}
302
+
303
+ .pane-header {{
304
+ padding: 20px;
305
+ border-bottom: 1px solid #e0e0e0;
306
+ background: #f8f9fa;
307
+ font-weight: 600;
308
+ font-size: 16px;
309
+ }}
310
+
311
+ .pane-content {{
312
+ flex: 1;
313
+ padding: 20px;
314
+ overflow-y: auto;
315
+ }}
316
+
317
+ .action-form {{
318
+ background: white;
319
+ border: 1px solid #e0e0e0;
320
+ border-radius: 8px;
321
+ padding: 20px;
322
+ margin-bottom: 20px;
323
+ }}
324
+
325
+ .form-group {{
326
+ margin-bottom: 15px;
327
+ }}
328
+
329
+ .form-group label {{
330
+ display: block;
331
+ margin-bottom: 5px;
332
+ font-weight: 500;
333
+ color: #333;
334
+ }}
335
+
336
+ .form-group input, .form-group textarea {{
337
+ width: 100%;
338
+ padding: 8px 12px;
339
+ border: 1px solid #ddd;
340
+ border-radius: 4px;
341
+ font-size: 14px;
342
+ }}
343
+
344
+ .form-group input:focus, .form-group textarea:focus {{
345
+ outline: none;
346
+ border-color: #007bff;
347
+ box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
348
+ }}
349
+
350
+ .btn {{
351
+ background: #007bff;
352
+ color: white;
353
+ border: none;
354
+ padding: 10px 20px;
355
+ border-radius: 4px;
356
+ cursor: pointer;
357
+ font-size: 14px;
358
+ margin-right: 10px;
359
+ margin-bottom: 10px;
360
+ }}
361
+
362
+ .btn:hover {{
363
+ background: #0056b3;
364
+ }}
365
+
366
+ .btn:disabled {{
367
+ background: #6c757d;
368
+ cursor: not-allowed;
369
+ }}
370
+
371
+ .btn-secondary {{
372
+ background: #6c757d;
373
+ }}
374
+
375
+ .btn-secondary:hover {{
376
+ background: #545b62;
377
+ }}
378
+
379
+ .state-display {{
380
+ background: white;
381
+ border: 1px solid #e0e0e0;
382
+ border-radius: 8px;
383
+ padding: 15px;
384
+ margin-bottom: 20px;
385
+ }}
386
+
387
+ .state-item {{
388
+ margin-bottom: 8px;
389
+ }}
390
+
391
+ .state-label {{
392
+ font-weight: 500;
393
+ color: #666;
394
+ }}
395
+
396
+ .state-value {{
397
+ color: #333;
398
+ font-family: monospace;
399
+ }}
400
+
401
+ .logs-container {{
402
+ background: white;
403
+ border: 1px solid #e0e0e0;
404
+ border-radius: 8px;
405
+ padding: 15px;
406
+ max-height: 400px;
407
+ overflow-y: auto;
408
+ }}
409
+
410
+ .log-entry {{
411
+ border-bottom: 1px solid #f0f0f0;
412
+ padding: 10px 0;
413
+ }}
414
+
415
+ .log-entry:last-child {{
416
+ border-bottom: none;
417
+ }}
418
+
419
+ .log-timestamp {{
420
+ font-size: 12px;
421
+ color: #666;
422
+ margin-bottom: 5px;
423
+ }}
424
+
425
+ .log-action {{
426
+ background: #e3f2fd;
427
+ padding: 8px;
428
+ border-radius: 4px;
429
+ margin-bottom: 5px;
430
+ font-family: monospace;
431
+ font-size: 12px;
432
+ }}
433
+
434
+ .log-observation {{
435
+ background: #f3e5f5;
436
+ padding: 8px;
437
+ border-radius: 4px;
438
+ font-family: monospace;
439
+ font-size: 12px;
440
+ }}
441
+
442
+ .log-reward {{
443
+ font-weight: 600;
444
+ color: #28a745;
445
+ }}
446
+
447
+ .log-done {{
448
+ font-weight: 600;
449
+ color: #dc3545;
450
+ }}
451
+
452
+ .status-indicator {{
453
+ display: inline-block;
454
+ width: 8px;
455
+ height: 8px;
456
+ border-radius: 50%;
457
+ margin-right: 8px;
458
+ }}
459
+
460
+ .status-connected {{
461
+ background: #28a745;
462
+ }}
463
+
464
+ .status-disconnected {{
465
+ background: #dc3545;
466
+ }}
467
+
468
+ .json-display {{
469
+ background: #f8f9fa;
470
+ border: 1px solid #e9ecef;
471
+ border-radius: 4px;
472
+ padding: 10px;
473
+ font-family: monospace;
474
+ font-size: 12px;
475
+ white-space: pre-wrap;
476
+ max-height: 200px;
477
+ overflow-y: auto;
478
+ }}
479
+ </style>
480
+ </head>
481
+ <body>
482
+ <div class="container">
483
+ <!-- Left Pane: HumanAgent Interface -->
484
+ <div class="left-pane">
485
+ <div class="pane-header">
486
+ <span class="status-indicator status-disconnected" id="connection-status"></span>
487
+ HumanAgent Interface
488
+ </div>
489
+ <div class="pane-content">
490
+ <!-- Action Form -->
491
+ <div class="action-form">
492
+ <h3>Take Action</h3>
493
+ <form id="action-form">
494
+ {_generate_action_form_fields(action_fields)}
495
+ <button type="submit" class="btn" id="step-btn">Step</button>
496
+ </form>
497
+ </div>
498
+
499
+ <!-- Control Buttons -->
500
+ <div style="margin-bottom: 20px;">
501
+ <button class="btn btn-secondary" id="reset-btn">Reset Environment</button>
502
+ <button class="btn btn-secondary" id="state-btn">Get State</button>
503
+ </div>
504
+
505
+ <!-- Current State Display -->
506
+ <div class="state-display">
507
+ <h3>Current State</h3>
508
+ <div id="current-state">
509
+ <div class="state-item">
510
+ <span class="state-label">Status:</span>
511
+ <span class="state-value" id="env-status">Not initialized</span>
512
+ </div>
513
+ <div class="state-item">
514
+ <span class="state-label">Episode ID:</span>
515
+ <span class="state-value" id="episode-id">-</span>
516
+ </div>
517
+ <div class="state-item">
518
+ <span class="state-label">Step Count:</span>
519
+ <span class="state-value" id="step-count">0</span>
520
+ </div>
521
+ </div>
522
+ </div>
523
+ </div>
524
+ </div>
525
+
526
+ <!-- Right Pane: State Observer -->
527
+ <div class="right-pane">
528
+ <div class="pane-header">
529
+ State Observer
530
+ </div>
531
+ <div class="pane-content">
532
+ <!-- Current Observation -->
533
+ <div class="state-display">
534
+ <h3>Current Observation</h3>
535
+ <div id="current-observation" class="json-display">
536
+ No observation yet
537
+ </div>
538
+ </div>
539
+
540
+ <!-- Action Logs -->
541
+ <div class="logs-container">
542
+ <h3>Action History</h3>
543
+ <div id="action-logs">
544
+ No actions taken yet
545
+ </div>
546
+ </div>
547
+ </div>
548
+ </div>
549
+ </div>
550
+
551
+ <script>
552
+ class OpenEnvWebInterface {{
553
+ constructor() {{
554
+ this.ws = null;
555
+ this.isConnected = false;
556
+ this.init();
557
+ }}
558
+
559
+ init() {{
560
+ this.connectWebSocket();
561
+ this.setupEventListeners();
562
+ }}
563
+
564
+ connectWebSocket() {{
565
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
566
+ const wsUrl = `${{protocol}}//${{window.location.host}}/ws`;
567
+
568
+ this.ws = new WebSocket(wsUrl);
569
+
570
+ this.ws.onopen = () => {{
571
+ this.isConnected = true;
572
+ this.updateConnectionStatus(true);
573
+ console.log('WebSocket connected');
574
+ }};
575
+
576
+ this.ws.onmessage = (event) => {{
577
+ const data = JSON.parse(event.data);
578
+ if (data.type === 'state_update') {{
579
+ this.updateUI(data.episode_state);
580
+ }}
581
+ }};
582
+
583
+ this.ws.onclose = () => {{
584
+ this.isConnected = false;
585
+ this.updateConnectionStatus(false);
586
+ console.log('WebSocket disconnected');
587
+ // Attempt to reconnect after 3 seconds
588
+ setTimeout(() => this.connectWebSocket(), 3000);
589
+ }};
590
+
591
+ this.ws.onerror = (error) => {{
592
+ console.error('WebSocket error:', error);
593
+ }};
594
+ }}
595
+
596
+ setupEventListeners() {{
597
+ // Action form submission
598
+ document.getElementById('action-form').addEventListener('submit', (e) => {{
599
+ e.preventDefault();
600
+ this.submitAction();
601
+ }});
602
+
603
+ // Reset button
604
+ document.getElementById('reset-btn').addEventListener('click', () => {{
605
+ this.resetEnvironment();
606
+ }});
607
+
608
+ // State button
609
+ document.getElementById('state-btn').addEventListener('click', () => {{
610
+ this.getState();
611
+ }});
612
+ }}
613
+
614
+ async submitAction() {{
615
+ const formData = new FormData(document.getElementById('action-form'));
616
+ const action = {{}};
617
+
618
+ // Collect form data
619
+ for (const [key, value] of formData.entries()) {{
620
+ if (value !== '') {{
621
+ action[key] = value;
622
+ }}
623
+ }}
624
+
625
+ try {{
626
+ const response = await fetch('/web/step', {{
627
+ method: 'POST',
628
+ headers: {{ 'Content-Type': 'application/json' }},
629
+ body: JSON.stringify({{ action }})
630
+ }});
631
+
632
+ if (!response.ok) {{
633
+ throw new Error(`HTTP error! status: ${{response.status}}`);
634
+ }}
635
+
636
+ const result = await response.json();
637
+ console.log('Step result:', result);
638
+ }} catch (error) {{
639
+ console.error('Error submitting action:', error);
640
+ alert('Error submitting action: ' + error.message);
641
+ }}
642
+ }}
643
+
644
+ async resetEnvironment() {{
645
+ try {{
646
+ const response = await fetch('/web/reset', {{
647
+ method: 'POST',
648
+ headers: {{ 'Content-Type': 'application/json' }}
649
+ }});
650
+
651
+ if (!response.ok) {{
652
+ throw new Error(`HTTP error! status: ${{response.status}}`);
653
+ }}
654
+
655
+ const result = await response.json();
656
+ console.log('Reset result:', result);
657
+ }} catch (error) {{
658
+ console.error('Error resetting environment:', error);
659
+ alert('Error resetting environment: ' + error.message);
660
+ }}
661
+ }}
662
+
663
+ async getState() {{
664
+ try {{
665
+ const response = await fetch('/web/state');
666
+ const state = await response.json();
667
+ console.log('Current state:', state);
668
+ alert('Current state: ' + JSON.stringify(state, null, 2));
669
+ }} catch (error) {{
670
+ console.error('Error getting state:', error);
671
+ alert('Error getting state: ' + error.message);
672
+ }}
673
+ }}
674
+
675
+ updateConnectionStatus(connected) {{
676
+ const indicator = document.getElementById('connection-status');
677
+ if (connected) {{
678
+ indicator.className = 'status-indicator status-connected';
679
+ }} else {{
680
+ indicator.className = 'status-indicator status-disconnected';
681
+ }}
682
+ }}
683
+
684
+ updateUI(episodeState) {{
685
+ // Update current state
686
+ document.getElementById('env-status').textContent =
687
+ episodeState.is_reset ? 'Reset' : 'Running';
688
+ document.getElementById('episode-id').textContent =
689
+ episodeState.episode_id || '-';
690
+ document.getElementById('step-count').textContent =
691
+ episodeState.step_count.toString();
692
+
693
+ // Update current observation
694
+ const observationDiv = document.getElementById('current-observation');
695
+ if (episodeState.current_observation) {{
696
+ observationDiv.textContent = JSON.stringify(
697
+ episodeState.current_observation, null, 2
698
+ );
699
+ }} else {{
700
+ observationDiv.textContent = 'No observation yet';
701
+ }}
702
+
703
+ // Update action logs
704
+ const logsDiv = document.getElementById('action-logs');
705
+ if (episodeState.action_logs.length === 0) {{
706
+ logsDiv.innerHTML = 'No actions taken yet';
707
+ }} else {{
708
+ logsDiv.innerHTML = episodeState.action_logs.map(log => `
709
+ <div class="log-entry">
710
+ <div class="log-timestamp">${{log.timestamp}} (Step ${{log.step_count}})</div>
711
+ <div class="log-action">Action: ${{JSON.stringify(log.action, null, 2)}}</div>
712
+ <div class="log-observation">Observation: ${{JSON.stringify(log.observation, null, 2)}}</div>
713
+ <div>
714
+ <span class="log-reward">Reward: ${{log.reward !== null ? log.reward : 'None'}}</span>
715
+ ${{log.done ? '<span class="log-done">DONE</span>' : ''}}
716
+ </div>
717
+ </div>
718
+ `).join('');
719
+ }}
720
+ }}
721
+ }}
722
+
723
+ // Initialize the web interface when the page loads
724
+ document.addEventListener('DOMContentLoaded', () => {{
725
+ new OpenEnvWebInterface();
726
+ }});
727
+ </script>
728
+ </body>
729
+ </html>
730
+ """.replace('{_generate_action_form_fields(action_fields)}', _generate_action_form_fields(action_fields))
731
+
732
+
733
+ def _generate_action_form_fields(action_fields: List[Dict[str, Any]]) -> str:
734
+ """Generate HTML form fields for action input."""
735
+ if not action_fields:
736
+ return '<p>No action fields available</p>'
737
+
738
+ fields_html = []
739
+ for field in action_fields:
740
+ if field['type'] == 'checkbox':
741
+ fields_html.append(f'''
742
+ <div class="form-group">
743
+ <label>
744
+ <input type="checkbox" name="{field['name']}" value="true">
745
+ {field['name']}
746
+ </label>
747
+ </div>
748
+ ''')
749
+ elif field['type'] == 'text' and 'message' in field['name'].lower():
750
+ fields_html.append(f'''
751
+ <div class="form-group">
752
+ <label for="{field['name']}">{field['name']}:</label>
753
+ <textarea name="{field['name']}" id="{field['name']}" rows="3" placeholder="Enter {field['name']}..."></textarea>
754
+ </div>
755
+ ''')
756
+ else:
757
+ fields_html.append(f'''
758
+ <div class="form-group">
759
+ <label for="{field['name']}">{field['name']}:</label>
760
+ <input type="{field['type']}" name="{field['name']}" id="{field['name']}" placeholder="Enter {field['name']}..." {"required" if field['required'] else ""}>
761
+ </div>
762
+ ''')
763
+
764
+ return '\n'.join(fields_html)
src/core/http_env_client.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/runner_env.py
3
+ Minimal HTTP-based environment client.
4
+ - Talks to a single env worker exposing: POST /reset, POST /step
5
+
6
+ Future hooks (commented below) for:
7
+ - episode_id, seed on reset
8
+ - request_id on step
9
+ - custom headers (auth/trace)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from abc import ABC, abstractmethod
15
+ from typing import TYPE_CHECKING, Any, Dict, Generic, Optional, Type, TypeVar
16
+ from .containers.runtime import LocalDockerProvider
17
+ import requests
18
+
19
+ from .types import StepResult
20
+
21
+ if TYPE_CHECKING:
22
+ from .containers.runtime import ContainerProvider
23
+
24
+ ActT = TypeVar("ActT")
25
+ ObsT = TypeVar("ObsT")
26
+ EnvClientT = TypeVar("EnvClientT", bound="HTTPEnvClient")
27
+
28
+
29
+ class HTTPEnvClient(ABC, Generic[ActT, ObsT]):
30
+ def __init__(
31
+ self,
32
+ base_url: str,
33
+ request_timeout_s: float = 15.0,
34
+ default_headers: Optional[Dict[str, str]] = None,
35
+ provider: Optional["ContainerProvider"] = None,
36
+ ):
37
+ self._base = base_url.rstrip("/")
38
+ self._timeout = float(request_timeout_s)
39
+ self._http = requests.Session()
40
+ self._headers = default_headers or {}
41
+ self._provider = provider
42
+
43
+ @classmethod
44
+ def from_docker_image(
45
+ cls: Type[EnvClientT],
46
+ image: str,
47
+ provider: Optional["ContainerProvider"] = None,
48
+ ) -> EnvClientT:
49
+ """
50
+ Create an environment client by spinning up a Docker container locally.
51
+
52
+ This is a development utility that:
53
+ 1. Starts a Docker container from the specified image
54
+ 2. Waits for the server to be ready
55
+ 3. Creates and returns a client instance connected to the container
56
+
57
+ Note: The container lifecycle management is left to the user or higher-level
58
+ orchestration. The container will keep running until manually stopped.
59
+
60
+ Args:
61
+ image: Docker image name to run (e.g., "echo-env:latest")
62
+ provider: Container provider to use (defaults to LocalDockerProvider)
63
+
64
+ Returns:
65
+ An instance of the client class connected to the running container
66
+
67
+ Example:
68
+ >>> from envs.coding_env.client import CodingEnv
69
+ >>> from envs.coding_env.models import CodeAction
70
+ >>>
71
+ >>> # Create environment from image
72
+ >>> env = CodingEnv.from_docker_image("coding-env:latest")
73
+ >>>
74
+ >>> # Use the environment
75
+ >>> result = env.reset()
76
+ >>> print(result.observation)
77
+ >>>
78
+ >>> step_result = env.step(CodeAction(code="print('hello')"))
79
+ >>> print(step_result.observation.stdout)
80
+ >>>
81
+ >>> # Cleanup (optional)
82
+ >>> env.close()
83
+ """
84
+
85
+ # Use default provider if none provided
86
+ if provider is None:
87
+ provider = LocalDockerProvider()
88
+
89
+ # 1. Start container
90
+ base_url = provider.start_container(image)
91
+
92
+ # 2. Wait for server to be ready
93
+ provider.wait_for_ready(base_url)
94
+
95
+ # 3. Create and return client instance with provider reference
96
+ return cls(base_url=base_url, provider=provider)
97
+
98
+ @abstractmethod
99
+ def _step_payload(self, action: ActT) -> dict:
100
+ """Convert an Action object to the JSON body expected by the env server."""
101
+ raise NotImplementedError
102
+
103
+ @abstractmethod
104
+ def _parse_result(self, payload: dict) -> StepResult[ObsT]:
105
+ """Convert a JSON response from the env server to StepResult[ObsT]."""
106
+ raise NotImplementedError
107
+
108
+ @abstractmethod
109
+ def _parse_state(self, payload: dict) -> Any:
110
+ """Convert a JSON response from the state endpoint to a State object."""
111
+ raise NotImplementedError
112
+
113
+ # ---------- Environment Server Interface Methods ----------
114
+ def reset(self) -> StepResult[ObsT]:
115
+ body: Dict[str, Any] = {}
116
+ # TODO: later:
117
+ # body["seed"] = seed
118
+ # body["episode_id"] = episode_id
119
+ r = self._http.post(
120
+ f"{self._base}/reset",
121
+ json=body,
122
+ headers=self._headers,
123
+ timeout=self._timeout,
124
+ )
125
+ r.raise_for_status()
126
+ return self._parse_result(r.json())
127
+
128
+ def step(self, action: ActT) -> StepResult[ObsT]:
129
+ body: Dict[str, Any] = {
130
+ "action": self._step_payload(action),
131
+ "timeout_s": int(self._timeout),
132
+ }
133
+ # TODO: later:
134
+ # body["request_id"] = str(uuid.uuid4())
135
+ # body["episode_id"] = current_episode_id
136
+ r = self._http.post(
137
+ f"{self._base}/step",
138
+ json=body,
139
+ headers=self._headers,
140
+ timeout=self._timeout,
141
+ )
142
+ r.raise_for_status()
143
+ return self._parse_result(r.json())
144
+
145
+ def state(self) -> Any:
146
+ """
147
+ Get the current environment state from the server.
148
+
149
+ Returns:
150
+ State object with environment state information (e.g., episode_id, step_count)
151
+
152
+ Example:
153
+ >>> client = EchoEnv.from_docker_image("echo-env:latest")
154
+ >>> result = client.reset()
155
+ >>> state = client.state()
156
+ >>> print(state.episode_id)
157
+ >>> print(state.step_count)
158
+ """
159
+ r = self._http.get(
160
+ f"{self._base}/state",
161
+ headers=self._headers,
162
+ timeout=self._timeout,
163
+ )
164
+ r.raise_for_status()
165
+ return self._parse_state(r.json())
166
+
167
+ def close(self) -> None:
168
+ """
169
+ Close the environment and clean up resources.
170
+
171
+ If this client was created via from_docker_image(), this will stop
172
+ and remove the associated container.
173
+ """
174
+ if self._provider is not None:
175
+ self._provider.stop_container()
src/core/tools/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core tools for code execution and other utilities."""
8
+
9
+ from .local_python_executor import PyExecutor
10
+
11
+ __all__ = ["PyExecutor"]
src/core/tools/local_python_executor.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Local Python Executor.
9
+
10
+ This module provides functionality for executing Python code locally by wrapping
11
+ the smolagents LocalPythonExecutor.
12
+ """
13
+
14
+ from smolagents import LocalPythonExecutor
15
+
16
+ from core.env_server.types import CodeExecResult
17
+
18
+
19
+ class PyExecutor:
20
+ """
21
+ Wrapper around smolagents LocalPythonExecutor for executing Python code.
22
+
23
+ This class provides a simple interface to execute Python code in a subprocess
24
+ and capture the results including stdout, stderr, and exit code.
25
+
26
+ Args:
27
+ additional_imports: List of additional module imports to authorize.
28
+ For example: ["numpy", "pandas", "matplotlib"]
29
+ These will be added to the base authorized imports.
30
+
31
+ Example:
32
+ >>> # Basic usage with default imports
33
+ >>> executor = PyExecutor()
34
+ >>> result = executor.run("print('Hello, World!')")
35
+ >>> print(result.stdout) # "Hello, World!\n"
36
+ >>> print(result.exit_code) # 0
37
+ >>>
38
+ >>> # Usage with additional imports
39
+ >>> executor = PyExecutor(additional_imports=["numpy", "pandas"])
40
+ >>> result = executor.run("import numpy as np\\nprint(np.array([1, 2, 3]))")
41
+ >>> print(result.stdout) # "[1 2 3]\n"
42
+ """
43
+
44
+ def __init__(self, additional_imports: list[str] | None = None):
45
+ """
46
+ Initialize the PyExecutor with a LocalPythonExecutor instance.
47
+
48
+ Args:
49
+ additional_imports: List of additional module names to authorize for import.
50
+ Defaults to an empty list if not provided.
51
+ """
52
+ if additional_imports is None:
53
+ additional_imports = []
54
+ self._executor = LocalPythonExecutor(
55
+ additional_authorized_imports=additional_imports
56
+ )
57
+ # Initialize tools to make BASE_PYTHON_TOOLS available (including print)
58
+ self._executor.send_tools({})
59
+
60
+ def run(self, code: str) -> CodeExecResult:
61
+ """
62
+ Execute Python code and return the result.
63
+
64
+ Args:
65
+ code: Python code string to execute
66
+
67
+ Returns:
68
+ CodeExecResult containing stdout, stderr, and exit_code
69
+
70
+ Example:
71
+ >>> executor = PyExecutor()
72
+ >>> result = executor.run("x = 5 + 3\\nprint(x)")
73
+ >>> print(result.stdout) # "8\n"
74
+ >>> print(result.exit_code) # 0
75
+ >>>
76
+ >>> # Error handling
77
+ >>> result = executor.run("1 / 0")
78
+ >>> print(result.exit_code) # 1
79
+ >>> print(result.stderr) # Contains error message
80
+ """
81
+ try:
82
+ # Execute the code using LocalPythonExecutor
83
+ # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
84
+ exec_result = self._executor(code)
85
+
86
+ # Extract the logs (which contain print outputs) as stdout
87
+ # The output field contains the return value of the code
88
+ stdout = exec_result.logs
89
+ stderr = ""
90
+ exit_code = 0 # Success
91
+
92
+ return CodeExecResult(
93
+ stdout=stdout,
94
+ stderr=stderr,
95
+ exit_code=exit_code,
96
+ )
97
+
98
+ except Exception as e:
99
+ # LocalPythonExecutor raises InterpreterError for various issues
100
+ # (syntax errors, forbidden operations, runtime errors, etc.)
101
+ return CodeExecResult(
102
+ stdout="",
103
+ stderr=str(e),
104
+ exit_code=1, # Non-zero indicates error
105
+ )
src/core/types.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Type definitions for EnvTorch
2
+ from dataclasses import dataclass
3
+ from typing import Any, Generic, Optional, TypeVar
4
+
5
+ # Generic type for observations
6
+ ObsT = TypeVar("ObsT") # TypeVar for typehinting in IDEs
7
+
8
+
9
+ @dataclass
10
+ class StepResult(Generic[ObsT]):
11
+ """
12
+ Represents the result of one environment step.
13
+
14
+ Attributes:
15
+ observation: The environment's observation after the action.
16
+ reward: Scalar reward for this step (optional).
17
+ done: Whether the episode is finished.
18
+ """
19
+
20
+ observation: ObsT
21
+ reward: Optional[float] = None
22
+ done: bool = False
src/envs/openspiel_env/README.md ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenSpiel Environment
2
+
3
+ Integration of OpenSpiel games with the OpenEnv framework. OpenSpiel (https://github.com/google-deepmind/open_spiel) is DeepMind's collection of 70+ game environments for RL research.
4
+
5
+ ## Supported Games
6
+
7
+ This environment supports 6 games across different categories:
8
+
9
+ ### Single-Player Games (No Opponent)
10
+ 1. **Catch** - Move horizontally to catch a falling ball
11
+ 2. **Cliff Walking** - Navigate grid without falling off cliff (Sutton & Barto benchmark)
12
+ 3. **2048** - Classic tile-merging puzzle game
13
+ 4. **Blackjack** - Simplified blackjack (HIT/STAND only)
14
+
15
+ ### Multi-Player Games (with Bot Opponent)
16
+ 5. **Tic-Tac-Toe** - Classic 3x3 game
17
+ 6. **Kuhn Poker** - 2-player simplified poker (game theory benchmark)
18
+
19
+ ## Architecture
20
+
21
+ ```
22
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
23
+ โ”‚ RL Training Code (Client) โ”‚
24
+ โ”‚ OpenSpielEnv.step(action) โ”‚
25
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
26
+ โ”‚ HTTP
27
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
28
+ โ”‚ FastAPI Server (Docker) โ”‚
29
+ โ”‚ OpenSpielEnvironment โ”‚
30
+ โ”‚ โ”œโ”€ Wraps rl_environment.Env โ”‚
31
+ โ”‚ โ”œโ”€ Agent controls player 0 โ”‚
32
+ โ”‚ โ””โ”€ Opponent: Random/Fixed โ”‚
33
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
34
+ ```
35
+
36
+ ## Installation & Usage
37
+
38
+ ### Option 1: Local Development (without Docker)
39
+
40
+ **Requirements:**
41
+ - OpenSpiel must be installed (see https://github.com/google-deepmind/open_spiel)
42
+ - Python 3.11+
43
+
44
+ ```python
45
+ from envs.openspiel_env import OpenSpielEnv, OpenSpielAction
46
+
47
+ # Start local server manually
48
+ # python -m envs.openspiel_env.server.app
49
+
50
+ # Connect to local server
51
+ env = OpenSpielEnv(base_url="http://localhost:8000")
52
+
53
+ # Reset environment
54
+ result = env.reset()
55
+ print(f"Initial state: {result.observation.info_state}")
56
+ print(f"Legal actions: {result.observation.legal_actions}")
57
+
58
+ # Take actions
59
+ for _ in range(10):
60
+ action_id = result.observation.legal_actions[0] # Choose first legal action
61
+ result = env.step(OpenSpielAction(action_id=action_id))
62
+ print(f"Reward: {result.reward}, Done: {result.done}")
63
+ if result.done:
64
+ break
65
+
66
+ # Cleanup
67
+ env.close()
68
+ ```
69
+
70
+ ### Option 2: Docker (Recommended)
71
+
72
+ **Build Docker image:**
73
+
74
+ ```bash
75
+ cd OpenEnv
76
+ docker build -f src/envs/openspiel_env/server/Dockerfile -t openspiel-env:latest .
77
+ ```
78
+
79
+ **Run specific games:**
80
+
81
+ ```bash
82
+ # Catch (default)
83
+ docker run -p 8000:8000 openspiel-env:latest
84
+
85
+ # Tic-Tac-Toe with random opponent
86
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=tic_tac_toe openspiel-env:latest
87
+
88
+ # Kuhn Poker
89
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=kuhn_poker openspiel-env:latest
90
+
91
+ # 2048
92
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=2048 openspiel-env:latest
93
+ ```
94
+
95
+ **Use with from_docker_image():**
96
+
97
+ ```python
98
+ from envs.openspiel_env import OpenSpielEnv, OpenSpielAction
99
+
100
+ # Automatically starts container
101
+ env = OpenSpielEnv.from_docker_image("openspiel-env:latest")
102
+
103
+ result = env.reset()
104
+ result = env.step(OpenSpielAction(action_id=0))
105
+
106
+ env.close() # Stops container
107
+ ```
108
+
109
+ ## Game-Specific Information
110
+
111
+ ### 1. Catch
112
+ - **Type**: Single-player
113
+ - **Action Space**: 3 actions (left, stay, right)
114
+ - **Observation**: 5x5 grid flattened (25 dimensions)
115
+ - **Reward**: +1 for catching ball, 0 otherwise
116
+ - **Episode Length**: ~10 steps
117
+
118
+ ```python
119
+ env = OpenSpielEnv.from_docker_image("openspiel-env:latest")
120
+ # Or set OPENSPIEL_GAME=catch
121
+ ```
122
+
123
+ ### 2. Tic-Tac-Toe
124
+ - **Type**: 2-player turn-based, perfect information
125
+ - **Players**: Agent (X) vs Random Bot (O)
126
+ - **Action Space**: 9 positions
127
+ - **Observation**: 27 dimensions (3x3 board + game state)
128
+ - **Reward**: +1 win, -1 loss, 0 draw/mid-game
129
+
130
+ ```python
131
+ # Set environment variable or run directly
132
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=tic_tac_toe openspiel-env:latest
133
+ ```
134
+
135
+ ### 3. Kuhn Poker
136
+ - **Type**: 2-player turn-based, imperfect information
137
+ - **Players**: Agent vs Random Bot
138
+ - **Action Space**: 2 actions (pass/fold, bet/call)
139
+ - **Observation**: 6 dimensions (card + betting history)
140
+ - **Reward**: Pot winnings (typically -1, 0, +1, +2)
141
+ - **Notes**: THE benchmark for imperfect-information RL
142
+
143
+ ```python
144
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=kuhn_poker openspiel-env:latest
145
+ ```
146
+
147
+ ### 4. Cliff Walking
148
+ - **Type**: Single-player grid world
149
+ - **Action Space**: 4 actions (up, down, left, right)
150
+ - **Observation**: Position encoding
151
+ - **Reward**: -1 per step, -100 for falling off cliff
152
+ - **Notes**: Classic RL benchmark from Sutton & Barto
153
+
154
+ ```python
155
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=cliff_walking openspiel-env:latest
156
+ ```
157
+
158
+ ### 5. 2048
159
+ - **Type**: Single-player puzzle
160
+ - **Action Space**: 4 actions (up, down, left, right)
161
+ - **Observation**: 4x4 grid with tile values
162
+ - **Reward**: Points from merging tiles
163
+ - **Notes**: Stochastic tile spawning
164
+
165
+ ```python
166
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=2048 openspiel-env:latest
167
+ ```
168
+
169
+ ### 6. Blackjack
170
+ - **Type**: Single-player vs dealer
171
+ - **Action Space**: 2 actions (HIT, STAND)
172
+ - **Observation**: Player hand + dealer's visible card
173
+ - **Reward**: +1 win, -1 loss, 0 draw
174
+ - **Notes**: Simplified version, no double/split
175
+
176
+ ```python
177
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=blackjack openspiel-env:latest
178
+ ```
179
+
180
+ ## Configuration
181
+
182
+ ### Environment Variables
183
+
184
+ - `OPENSPIEL_GAME`: Game name (default: "catch")
185
+ - `OPENSPIEL_AGENT_PLAYER`: Player ID for agent (default: 0)
186
+ - `OPENSPIEL_OPPONENT_POLICY`: Opponent policy for multi-player games
187
+ - `random`: Uniform random (default)
188
+ - `first`: Always picks first legal action
189
+ - `last`: Always picks last legal action
190
+
191
+ ### Example: Tic-Tac-Toe with Fixed Opponent
192
+
193
+ ```bash
194
+ docker run -p 8000:8000 \
195
+ -e OPENSPIEL_GAME=tic_tac_toe \
196
+ -e OPENSPIEL_OPPONENT_POLICY=first \
197
+ openspiel-env:latest
198
+ ```
199
+
200
+ ## API Reference
201
+
202
+ ### OpenSpielAction
203
+
204
+ ```python
205
+ @dataclass
206
+ class OpenSpielAction(Action):
207
+ action_id: int # Action to take
208
+ game_name: str = "catch" # Game name
209
+ game_params: Dict[str, Any] = {} # Optional game parameters
210
+ ```
211
+
212
+ ### OpenSpielObservation
213
+
214
+ ```python
215
+ @dataclass
216
+ class OpenSpielObservation(Observation):
217
+ info_state: List[float] # Agent's information state
218
+ legal_actions: List[int] # Legal action IDs
219
+ game_phase: str # "initial", "playing", "terminal"
220
+ current_player_id: int # Current player (-1 for simultaneous)
221
+ opponent_last_action: Optional[int] # Last opponent action (if available)
222
+ done: bool # Episode finished
223
+ reward: Optional[float] # Reward for last action
224
+ ```
225
+
226
+ ### OpenSpielState
227
+
228
+ ```python
229
+ @dataclass
230
+ class OpenSpielState(State):
231
+ episode_id: str # Unique episode ID
232
+ step_count: int # Number of steps
233
+ game_name: str # Game name
234
+ agent_player: int # Agent's player ID
235
+ opponent_policy: str # Opponent policy name
236
+ num_players: int # Total players
237
+ ```
238
+
239
+ ## Testing
240
+
241
+ ### Automated Testing (All 6 Games)
242
+
243
+ **Quick test of all games in Docker:**
244
+ ```bash
245
+ ./test_docker_all_games.sh
246
+ ```
247
+
248
+ This automated script will:
249
+ - Build and run Docker containers for each game
250
+ - Test reset, step, and state APIs
251
+ - Verify episode completion
252
+ - Report pass/fail for all 6 games
253
+
254
+ **Expected output:**
255
+ ```
256
+ ========================================
257
+ OpenSpiel Docker Integration Test
258
+ ========================================
259
+
260
+ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
261
+ Testing: catch
262
+ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
263
+ ๐Ÿณ Starting Docker container...
264
+ โณ Waiting for server to be ready...
265
+ โœ“ Server ready (2s)
266
+ ๐ŸŽฎ Running Python client test...
267
+ โœ“ PASSED - Episode completed successfully
268
+
269
+ [... tests all 6 games ...]
270
+
271
+ ========================================
272
+ Test Summary
273
+ ========================================
274
+
275
+ โœ“ catch
276
+ โœ“ tic_tac_toe
277
+ โœ“ kuhn_poker
278
+ โœ“ cliff_walking
279
+ โœ“ 2048
280
+ โœ“ blackjack
281
+
282
+ Total: 6 passed, 0 failed out of 6 games
283
+
284
+ ========================================
285
+ All tests PASSED! ๐ŸŽ‰
286
+ ========================================
287
+ ```
288
+
289
+ ### Manual Testing
290
+
291
+ ```bash
292
+ # Local (requires OpenSpiel installed)
293
+ python -m pytest src/envs/openspiel_env/
294
+
295
+ # Docker build
296
+ docker build -f src/envs/openspiel_env/server/Dockerfile -t openspiel-env:latest .
297
+
298
+ # Run specific game
299
+ docker run -p 8000:8000 openspiel-env:latest
300
+
301
+ # Test from another terminal
302
+ python3 examples/openspiel_simple.py
303
+ ```
304
+
305
+ ## Development
306
+
307
+ ### Adding New Games
308
+
309
+ To add support for more OpenSpiel games:
310
+
311
+ 1. Verify the game works with `rl_environment.Environment`
312
+ 2. Test with different opponent policies if multi-player
313
+ 3. Document game-specific configuration
314
+ 4. Add example script
315
+
316
+ ## Limitations
317
+
318
+ - **Simultaneous-move games**: Only agent_player=0 supported
319
+ - **Multi-agent training**: Single agent only (no self-play yet)
320
+ - **Opponent policies**: Random and fixed only (no MCTS yet)
321
+ - **Build time**: Docker image takes ~5-10 minutes to build (compiles C++)
322
+
323
+ ## Future Work
324
+
325
+ - MCTS opponent policies
326
+ - Self-play support (multiple agents)
327
+ - More games (Chess, Go, Poker Hold'em)
328
+ - Faster build with pre-built OpenSpiel base image
329
+ - Game-specific reward shaping options
330
+
331
+ ## References
332
+
333
+ - [OpenSpiel Paper (2019)](https://arxiv.org/abs/1908.09453)
334
+ - [OpenSpiel GitHub](https://github.com/google-deepmind/open_spiel)
335
+ - [OpenSpiel Documentation](https://openspiel.readthedocs.io/)
src/envs/openspiel_env/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpiel Environment Integration.
9
+
10
+ This module provides integration between OpenSpiel games and the OpenEnv framework.
11
+ OpenSpiel (https://github.com/google-deepmind/open_spiel) is DeepMind's collection
12
+ of environments and algorithms for research in RL in games.
13
+
14
+ Supported games:
15
+ - Catch (1P)
16
+ - Tic-Tac-Toe (2P)
17
+ - Kuhn Poker (2P, imperfect info)
18
+ - Cliff Walking (1P)
19
+ - 2048 (1P)
20
+ - Blackjack (1P)
21
+ """
22
+
23
+ from .client import OpenSpielEnv
24
+ from .models import OpenSpielAction, OpenSpielObservation, OpenSpielState
25
+
26
+ __all__ = ["OpenSpielEnv", "OpenSpielAction", "OpenSpielObservation", "OpenSpielState"]
src/envs/openspiel_env/client.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpielEnv HTTP Client.
9
+
10
+ This module provides the client for connecting to an OpenSpiel Environment server
11
+ over HTTP.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Dict, Optional, TYPE_CHECKING
17
+
18
+ from core.http_env_client import HTTPEnvClient
19
+ from core.types import StepResult
20
+
21
+ from .models import OpenSpielAction, OpenSpielObservation, OpenSpielState
22
+
23
+ if TYPE_CHECKING:
24
+ from core.containers.runtime import ContainerProvider
25
+
26
+
27
+ class OpenSpielEnv(HTTPEnvClient[OpenSpielAction, OpenSpielObservation]):
28
+ """
29
+ HTTP client for OpenSpiel Environment.
30
+
31
+ This client connects to an OpenSpielEnvironment HTTP server and provides
32
+ methods to interact with it: reset(), step(), and state access.
33
+
34
+ Example:
35
+ >>> # Connect to a running server
36
+ >>> client = OpenSpielEnv(base_url="http://localhost:8000")
37
+ >>> result = client.reset()
38
+ >>> print(result.observation.info_state)
39
+ >>>
40
+ >>> # Take an action
41
+ >>> result = client.step(OpenSpielAction(action_id=1, game_name="catch"))
42
+ >>> print(result.observation.reward)
43
+
44
+ Example with Docker:
45
+ >>> # Automatically start container and connect
46
+ >>> client = OpenSpielEnv.from_docker_image("openspiel-env:latest")
47
+ >>> result = client.reset()
48
+ >>> result = client.step(OpenSpielAction(action_id=0))
49
+ """
50
+
51
+ def _step_payload(self, action: OpenSpielAction) -> Dict[str, Any]:
52
+ """
53
+ Convert OpenSpielAction to JSON payload for step request.
54
+
55
+ Args:
56
+ action: OpenSpielAction instance.
57
+
58
+ Returns:
59
+ Dictionary representation suitable for JSON encoding.
60
+ """
61
+ return {
62
+ "action_id": action.action_id,
63
+ "game_name": action.game_name,
64
+ "game_params": action.game_params,
65
+ }
66
+
67
+ def _parse_result(self, payload: Dict[str, Any]) -> StepResult[OpenSpielObservation]:
68
+ """
69
+ Parse server response into StepResult[OpenSpielObservation].
70
+
71
+ Args:
72
+ payload: JSON response from server.
73
+
74
+ Returns:
75
+ StepResult with OpenSpielObservation.
76
+ """
77
+ obs_data = payload.get("observation", {})
78
+
79
+ observation = OpenSpielObservation(
80
+ info_state=obs_data.get("info_state", []),
81
+ legal_actions=obs_data.get("legal_actions", []),
82
+ game_phase=obs_data.get("game_phase", "playing"),
83
+ current_player_id=obs_data.get("current_player_id", 0),
84
+ opponent_last_action=obs_data.get("opponent_last_action"),
85
+ done=payload.get("done", False),
86
+ reward=payload.get("reward"),
87
+ metadata=obs_data.get("metadata", {}),
88
+ )
89
+
90
+ return StepResult(
91
+ observation=observation,
92
+ reward=payload.get("reward"),
93
+ done=payload.get("done", False),
94
+ )
95
+
96
+ def _parse_state(self, payload: Dict[str, Any]) -> OpenSpielState:
97
+ """
98
+ Parse server response into OpenSpielState object.
99
+
100
+ Args:
101
+ payload: JSON response from /state endpoint.
102
+
103
+ Returns:
104
+ OpenSpielState object with environment state information.
105
+ """
106
+ return OpenSpielState(
107
+ episode_id=payload.get("episode_id"),
108
+ step_count=payload.get("step_count", 0),
109
+ game_name=payload.get("game_name", "unknown"),
110
+ agent_player=payload.get("agent_player", 0),
111
+ opponent_policy=payload.get("opponent_policy", "random"),
112
+ game_params=payload.get("game_params", {}),
113
+ num_players=payload.get("num_players", 1),
114
+ )
src/envs/openspiel_env/docker_issue.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # port issue? fix proxy?
src/envs/openspiel_env/models.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for OpenSpiel Environment.
9
+
10
+ This module defines the Action, Observation, and State types for OpenSpiel games.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ from core.env_server import Action, Observation, State
19
+
20
+
21
+ @dataclass
22
+ class OpenSpielAction(Action):
23
+ """
24
+ Action for OpenSpiel environments.
25
+
26
+ Attributes:
27
+ action_id: The integer action ID to take (from legal_actions).
28
+ game_name: Name of the OpenSpiel game (e.g., "catch", "tic_tac_toe").
29
+ game_params: Optional game-specific parameters (e.g., {"rows": 8, "columns": 6}).
30
+ """
31
+ action_id: int
32
+ game_name: str = "catch"
33
+ game_params: Dict[str, Any] = field(default_factory=dict)
34
+
35
+
36
+ @dataclass
37
+ class OpenSpielObservation(Observation):
38
+ """
39
+ Observation from OpenSpiel environment.
40
+
41
+ This represents what the agent sees after taking an action.
42
+ For single-player games, this is straightforward.
43
+ For multi-player games, this is from the perspective of the agent player.
44
+
45
+ Attributes:
46
+ info_state: Information state tensor (list of floats) for the agent.
47
+ This contains all information available to the agent.
48
+ legal_actions: List of legal action IDs the agent can take.
49
+ game_phase: String describing the current phase (e.g., "playing", "terminal").
50
+ current_player_id: ID of the current player (-1 for simultaneous, player ID otherwise).
51
+ opponent_last_action: Last action taken by opponent (if available, None otherwise).
52
+ """
53
+ info_state: List[float]
54
+ legal_actions: List[int]
55
+ game_phase: str = "playing"
56
+ current_player_id: int = 0
57
+ opponent_last_action: Optional[int] = None
58
+
59
+
60
+ @dataclass
61
+ class OpenSpielState(State):
62
+ """
63
+ State for OpenSpiel environment.
64
+
65
+ Attributes:
66
+ game_name: Name of the OpenSpiel game.
67
+ agent_player: Which player ID the agent controls (0 by default).
68
+ opponent_policy: Name of the opponent policy ("random", "fixed", etc.).
69
+ game_params: Game-specific parameters.
70
+ num_players: Total number of players in the game.
71
+ """
72
+ game_name: str = "catch"
73
+ agent_player: int = 0
74
+ opponent_policy: str = "random"
75
+ game_params: Dict[str, Any] = field(default_factory=dict)
76
+ num_players: int = 1
src/envs/openspiel_env/server/Dockerfile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build for OpenSpiel + OpenEnv
8
+ # Stage 1: Build OpenSpiel C++ bindings
9
+ # Using Python 3.11 to match envtorch-base
10
+ FROM python:3.11 AS openspiel-builder
11
+
12
+ # Avoid interactive prompts during build
13
+ ENV DEBIAN_FRONTEND=noninteractive
14
+ ENV TZ=UTC
15
+
16
+ # Install build dependencies
17
+ RUN apt-get update && apt-get install -y --no-install-recommends \
18
+ build-essential \
19
+ clang \
20
+ cmake \
21
+ curl \
22
+ git \
23
+ sudo \
24
+ && rm -rf /var/lib/apt/lists/*
25
+
26
+ # Set up OpenSpiel build directory
27
+ RUN mkdir /repo
28
+ WORKDIR /repo
29
+
30
+ # Clone OpenSpiel
31
+ RUN git clone https://github.com/google-deepmind/open_spiel.git .
32
+
33
+ # Run OpenSpiel's installation script (downloads C++ dependencies)
34
+ RUN ./install.sh
35
+
36
+ # Install Python dependencies
37
+ RUN pip3 install --no-cache-dir --upgrade setuptools testresources importlib_metadata
38
+ RUN pip3 install --no-cache-dir --upgrade -r requirements.txt cmake
39
+
40
+ # Build OpenSpiel with Python 3.11
41
+ RUN mkdir -p build
42
+ WORKDIR /repo/build
43
+ RUN cmake -DPython3_EXECUTABLE=$(which python3) -DCMAKE_CXX_COMPILER=$(which clang++) ../open_spiel
44
+ RUN make -j$(nproc) pyspiel
45
+
46
+ # Stage 2: Runtime image using published openenv-base
47
+ # Uses the standardized base image from GitHub Container Registry
48
+ # See: https://github.com/meta-pytorch/OpenEnv/pkgs/container/openenv-base
49
+ FROM ghcr.io/meta-pytorch/openenv-base:latest
50
+
51
+ # Copy OpenSpiel build artifacts from builder
52
+ RUN mkdir -p /repo
53
+ COPY --from=openspiel-builder /repo /repo
54
+
55
+ # Install OpenSpiel Python requirements in runtime
56
+ WORKDIR /repo
57
+ RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
58
+
59
+ # Set Python path for OpenSpiel
60
+ ENV PYTHONPATH=/repo:/repo/build/python:${PYTHONPATH}
61
+
62
+ # Copy OpenEnv core (base image already set WORKDIR=/app)
63
+ WORKDIR /app
64
+ COPY src/core/ /app/src/core/
65
+
66
+ # Copy OpenSpiel environment
67
+ COPY src/envs/openspiel_env/ /app/src/envs/openspiel_env/
68
+
69
+ # Extend Python path for OpenEnv (base image set PYTHONPATH=/app/src)
70
+ # We prepend OpenSpiel paths
71
+ ENV PYTHONPATH=/repo:/repo/build/python:/app/src
72
+
73
+ # OpenSpiel-specific environment variables (can be overridden at runtime)
74
+ ENV OPENSPIEL_GAME=catch
75
+ ENV OPENSPIEL_AGENT_PLAYER=0
76
+ ENV OPENSPIEL_OPPONENT_POLICY=random
77
+
78
+ # Health check (curl is provided by envtorch-base)
79
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
80
+ CMD curl -f http://localhost:8000/health || exit 1
81
+
82
+ # Note: EXPOSE 8000 already set by envtorch-base
83
+
84
+ # Run the FastAPI server (uvicorn installed by envtorch-base)
85
+ CMD ["uvicorn", "envs.openspiel_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
src/envs/openspiel_env/server/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Server-side implementation for OpenSpiel environments."""
src/envs/openspiel_env/server/app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the OpenSpiel Environment.
9
+
10
+ This module creates an HTTP server that exposes OpenSpiel games
11
+ over HTTP endpoints, making them compatible with HTTPEnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn envs.openspiel_env.server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn envs.openspiel_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ python -m envs.openspiel_env.server.app
22
+
23
+ Environment variables:
24
+ OPENSPIEL_GAME: Game name to serve (default: "catch")
25
+ OPENSPIEL_AGENT_PLAYER: Agent player ID (default: 0)
26
+ OPENSPIEL_OPPONENT_POLICY: Opponent policy (default: "random")
27
+ """
28
+
29
+ import os
30
+
31
+ from core.env_server import create_app
32
+
33
+ from ..models import OpenSpielAction, OpenSpielObservation
34
+ from .openspiel_environment import OpenSpielEnvironment
35
+
36
+ # Get game configuration from environment variables
37
+ game_name = os.getenv("OPENSPIEL_GAME", "catch")
38
+ agent_player = int(os.getenv("OPENSPIEL_AGENT_PLAYER", "0"))
39
+ opponent_policy = os.getenv("OPENSPIEL_OPPONENT_POLICY", "random")
40
+
41
+ # Create the environment instance
42
+ env = OpenSpielEnvironment(
43
+ game_name=game_name,
44
+ agent_player=agent_player,
45
+ opponent_policy=opponent_policy,
46
+ )
47
+
48
+ # Create the FastAPI app with routes
49
+ app = create_app(env, OpenSpielAction, OpenSpielObservation)
50
+
51
+
52
+ if __name__ == "__main__":
53
+ import uvicorn
54
+
55
+ uvicorn.run(app, host="0.0.0.0", port=8000)
src/envs/openspiel_env/server/build_docker.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Script to build the OpenSpiel environment Docker image
9
+ # Usage: ./build_docker.sh [tag]
10
+ #
11
+ # Note: Requires envtorch-base:latest to be built first.
12
+ # See: src/core/containers/images/README.md
13
+
14
+ set -e
15
+
16
+ TAG="${1:-latest}"
17
+ IMAGE_NAME="openspiel-env:${TAG}"
18
+
19
+ echo "๐Ÿณ Building OpenSpiel Environment Docker Image"
20
+ echo "================================================"
21
+ echo "Image: $IMAGE_NAME"
22
+ echo ""
23
+
24
+ # Get script directory
25
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
26
+
27
+ # Navigate to OpenEnv root (4 levels up from server/)
28
+ OPENENV_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
29
+
30
+ echo "๐Ÿ“ OpenEnv root: $OPENENV_ROOT"
31
+ echo ""
32
+
33
+ # Build OpenSpiel environment image
34
+ # Note: Docker will automatically pull ghcr.io/meta-pytorch/openenv-base:latest if needed
35
+ echo "โณ Building (this may take 5-10 minutes due to OpenSpiel compilation)..."
36
+ docker build \
37
+ -f "$SCRIPT_DIR/Dockerfile" \
38
+ -t "$IMAGE_NAME" \
39
+ "$OPENENV_ROOT"
40
+
41
+ if [ $? -eq 0 ]; then
42
+ echo ""
43
+ echo "โœ… Build successful!"
44
+ echo ""
45
+ echo "๐Ÿš€ Run with different games:"
46
+ echo ""
47
+ echo " # Catch (default)"
48
+ echo " docker run -p 8000:8000 $IMAGE_NAME"
49
+ echo ""
50
+ echo " # Tic-Tac-Toe"
51
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=tic_tac_toe $IMAGE_NAME"
52
+ echo ""
53
+ echo " # Kuhn Poker"
54
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=kuhn_poker $IMAGE_NAME"
55
+ echo ""
56
+ echo " # Cliff Walking"
57
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=cliff_walking $IMAGE_NAME"
58
+ echo ""
59
+ echo " # 2048"
60
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=2048 $IMAGE_NAME"
61
+ echo ""
62
+ echo " # Blackjack"
63
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=blackjack $IMAGE_NAME"
64
+ echo ""
65
+ else
66
+ echo ""
67
+ echo "โŒ Build failed!"
68
+ exit 1
69
+ fi
src/envs/openspiel_env/server/openspiel_environment.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpiel Environment Server Implementation.
9
+
10
+ This module wraps OpenSpiel's rl_environment.Environment and exposes it
11
+ via the OpenEnv Environment interface.
12
+ """
13
+
14
+ import uuid
15
+ from typing import Any, Dict
16
+
17
+ from core.env_server import Action, Environment, Observation
18
+
19
+ from ..models import OpenSpielAction, OpenSpielObservation, OpenSpielState
20
+ from .opponent_policies import get_opponent_policy, OpponentPolicy
21
+
22
+ # Import OpenSpiel
23
+ try:
24
+ from open_spiel.python import rl_environment
25
+ import pyspiel
26
+ except ImportError as e:
27
+ raise ImportError(
28
+ "OpenSpiel is not installed. "
29
+ "Please install it following instructions at: "
30
+ "https://github.com/google-deepmind/open_spiel"
31
+ ) from e
32
+
33
+
34
+ class OpenSpielEnvironment(Environment):
35
+ """
36
+ OpenSpiel Environment wrapper for OpenEnv.
37
+
38
+ This environment wraps OpenSpiel games and provides a single-agent interface.
39
+ For multi-player games, the agent controls one player while opponent(s) use
40
+ a fixed policy (e.g., random).
41
+
42
+ Supported games:
43
+ - Single-player: catch, cliff_walking, 2048, blackjack
44
+ - Multi-player: tic_tac_toe, kuhn_poker
45
+
46
+ Args:
47
+ game_name: Name of the OpenSpiel game (e.g., "catch", "tic_tac_toe").
48
+ agent_player: Which player ID the agent controls (default 0).
49
+ opponent_policy: Policy for opponent players ("random", "first", etc.).
50
+ game_params: Optional game-specific parameters.
51
+
52
+ Example:
53
+ >>> env = OpenSpielEnvironment("catch")
54
+ >>> obs = env.reset()
55
+ >>> print(obs.info_state) # Agent's observation
56
+ >>> obs = env.step(OpenSpielAction(action_id=1))
57
+ >>> print(obs.reward)
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ game_name: str = "catch",
63
+ agent_player: int = 0,
64
+ opponent_policy: str = "random",
65
+ game_params: Dict[str, Any] | None = None,
66
+ ):
67
+ """Initialize OpenSpiel environment."""
68
+ super().__init__()
69
+
70
+ self.game_name = game_name
71
+ self.agent_player = agent_player
72
+ self.game_params = game_params or {}
73
+
74
+ # Create OpenSpiel environment
75
+ try:
76
+ self._ospiel_env = rl_environment.Environment(
77
+ game_name, **self.game_params
78
+ )
79
+ except Exception as e:
80
+ raise ValueError(
81
+ f"Failed to create OpenSpiel game '{game_name}': {e}"
82
+ ) from e
83
+
84
+ self.num_players = self._ospiel_env.num_players
85
+ self.is_turn_based = self._ospiel_env.is_turn_based
86
+
87
+ # Validate agent_player
88
+ if agent_player >= self.num_players:
89
+ raise ValueError(
90
+ f"agent_player={agent_player} >= num_players={self.num_players}"
91
+ )
92
+
93
+ # Set up opponent policy for multi-player games
94
+ self.opponent_policy_fn: OpponentPolicy | None = None
95
+ if self.num_players > 1:
96
+ self.opponent_policy_fn = get_opponent_policy(opponent_policy)
97
+
98
+ # Initialize state
99
+ self._state = OpenSpielState(
100
+ game_name=game_name,
101
+ agent_player=agent_player,
102
+ opponent_policy=opponent_policy,
103
+ game_params=self.game_params,
104
+ num_players=self.num_players,
105
+ )
106
+
107
+ # Track last opponent action for learning
108
+ self._last_opponent_action: int | None = None
109
+
110
+ def reset(self) -> Observation:
111
+ """
112
+ Reset the environment and return initial observation.
113
+
114
+ For multi-player games, this will auto-play opponent turns until
115
+ it's the agent's turn (or terminal state).
116
+
117
+ Returns:
118
+ Initial observation for the agent.
119
+ """
120
+ # Reset OpenSpiel environment
121
+ time_step = self._ospiel_env.reset()
122
+
123
+ # Reset state tracking
124
+ self._state.episode_id = str(uuid.uuid4())
125
+ self._state.step_count = 0
126
+ self._last_opponent_action = None
127
+
128
+ # Auto-play opponent turns until agent's turn
129
+ time_step = self._auto_play_opponents(time_step)
130
+
131
+ # Convert to OpenEnv observation
132
+ return self._make_observation(time_step)
133
+
134
+ def step(self, action: Action) -> Observation:
135
+ """
136
+ Execute agent's action and return resulting observation.
137
+
138
+ For multi-player games, this will:
139
+ 1. Apply the agent's action
140
+ 2. Auto-play opponent turns until it's the agent's turn again
141
+ 3. Return the observation from the agent's perspective
142
+
143
+ Args:
144
+ action: OpenSpielAction containing the action_id to execute.
145
+
146
+ Returns:
147
+ Observation after action execution (and opponent turns if multi-player).
148
+
149
+ Raises:
150
+ ValueError: If action is not an OpenSpielAction.
151
+ """
152
+ if not isinstance(action, OpenSpielAction):
153
+ raise ValueError(f"Expected OpenSpielAction, got {type(action)}")
154
+
155
+ # Apply agent's action
156
+ if self.is_turn_based:
157
+ # Turn-based: single action
158
+ time_step = self._ospiel_env.step([action.action_id])
159
+ else:
160
+ # Simultaneous-move: need actions for all players
161
+ # For now, only support agent as player 0 in simultaneous games
162
+ if self.agent_player != 0:
163
+ raise NotImplementedError(
164
+ "Simultaneous-move games only support agent_player=0"
165
+ )
166
+ # Get opponent actions
167
+ opponent_actions = []
168
+ for player_id in range(self.num_players):
169
+ if player_id == self.agent_player:
170
+ opponent_actions.append(action.action_id)
171
+ else:
172
+ legal_actions = time_step.observations["legal_actions"][player_id]
173
+ opp_action = self.opponent_policy_fn.select_action(
174
+ legal_actions, time_step.observations
175
+ )
176
+ opponent_actions.append(opp_action)
177
+ time_step = self._ospiel_env.step(opponent_actions)
178
+
179
+ self._state.step_count += 1
180
+
181
+ # Auto-play opponent turns (for turn-based games)
182
+ if self.is_turn_based:
183
+ time_step = self._auto_play_opponents(time_step)
184
+
185
+ # Convert to OpenEnv observation
186
+ return self._make_observation(time_step)
187
+
188
+ @property
189
+ def state(self) -> OpenSpielState:
190
+ """Get current environment state."""
191
+ return self._state
192
+
193
+ def _auto_play_opponents(self, time_step) -> Any:
194
+ """
195
+ Auto-play opponent turns until it's the agent's turn or game is terminal.
196
+
197
+ Args:
198
+ time_step: Current TimeStep from OpenSpiel environment.
199
+
200
+ Returns:
201
+ Updated TimeStep after opponent moves.
202
+ """
203
+ # Single-player games: nothing to do
204
+ if self.num_players == 1:
205
+ return time_step
206
+
207
+ # Multi-player games: play opponent turns
208
+ while (
209
+ not time_step.last()
210
+ and time_step.observations["current_player"] != self.agent_player
211
+ ):
212
+ current_player = time_step.observations["current_player"]
213
+ legal_actions = time_step.observations["legal_actions"][current_player]
214
+
215
+ # Select opponent action
216
+ opp_action = self.opponent_policy_fn.select_action(
217
+ legal_actions, time_step.observations
218
+ )
219
+ self._last_opponent_action = opp_action
220
+
221
+ # Apply opponent action
222
+ time_step = self._ospiel_env.step([opp_action])
223
+ self._state.step_count += 1
224
+
225
+ return time_step
226
+
227
+ def _make_observation(self, time_step) -> OpenSpielObservation:
228
+ """
229
+ Convert OpenSpiel TimeStep to OpenEnv Observation.
230
+
231
+ Args:
232
+ time_step: OpenSpiel TimeStep object.
233
+
234
+ Returns:
235
+ OpenSpielObservation for the agent.
236
+ """
237
+ # Extract agent's information
238
+ info_state = time_step.observations["info_state"][self.agent_player]
239
+ legal_actions = time_step.observations["legal_actions"][self.agent_player]
240
+ current_player_id = time_step.observations["current_player"]
241
+
242
+ # Determine game phase
243
+ if time_step.last():
244
+ game_phase = "terminal"
245
+ elif time_step.first():
246
+ game_phase = "initial"
247
+ else:
248
+ game_phase = "playing"
249
+
250
+ # Get reward for agent
251
+ reward = None
252
+ if time_step.rewards is not None:
253
+ reward = float(time_step.rewards[self.agent_player])
254
+
255
+ # Create observation
256
+ obs = OpenSpielObservation(
257
+ info_state=info_state.tolist() if hasattr(info_state, "tolist") else list(info_state),
258
+ legal_actions=legal_actions,
259
+ game_phase=game_phase,
260
+ current_player_id=current_player_id,
261
+ opponent_last_action=self._last_opponent_action,
262
+ done=time_step.last(),
263
+ reward=reward,
264
+ )
265
+
266
+ return obs
src/envs/openspiel_env/server/opponent_policies.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Opponent policies for multi-player OpenSpiel games.
9
+
10
+ These policies are used to control non-agent players in multi-player games,
11
+ allowing single-agent RL training against fixed or adaptive opponents.
12
+ """
13
+
14
+ import random
15
+ from typing import Any, Protocol
16
+
17
+
18
+ class OpponentPolicy(Protocol):
19
+ """Protocol for opponent policies."""
20
+
21
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
22
+ """
23
+ Select an action for the opponent.
24
+
25
+ Args:
26
+ legal_actions: List of legal action IDs.
27
+ observations: Current observations from the environment.
28
+
29
+ Returns:
30
+ Selected action ID.
31
+ """
32
+ ...
33
+
34
+
35
+ class RandomOpponent:
36
+ """Random opponent that selects uniformly from legal actions."""
37
+
38
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
39
+ """Select a random legal action."""
40
+ if not legal_actions:
41
+ raise ValueError("No legal actions available")
42
+ return random.choice(legal_actions)
43
+
44
+
45
+ class FixedActionOpponent:
46
+ """Opponent that always selects the same action (e.g., first legal action)."""
47
+
48
+ def __init__(self, action_selector: str = "first"):
49
+ """
50
+ Initialize fixed action opponent.
51
+
52
+ Args:
53
+ action_selector: Which action to select ("first", "last", "middle").
54
+ """
55
+ self.action_selector = action_selector
56
+
57
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
58
+ """Select a fixed legal action based on selector."""
59
+ if not legal_actions:
60
+ raise ValueError("No legal actions available")
61
+
62
+ if self.action_selector == "first":
63
+ return legal_actions[0]
64
+ elif self.action_selector == "last":
65
+ return legal_actions[-1]
66
+ elif self.action_selector == "middle":
67
+ return legal_actions[len(legal_actions) // 2]
68
+ else:
69
+ return legal_actions[0]
70
+
71
+
72
+ def get_opponent_policy(policy_name: str) -> OpponentPolicy:
73
+ """
74
+ Get an opponent policy by name.
75
+
76
+ Args:
77
+ policy_name: Name of the policy ("random", "first", "last", "middle").
78
+
79
+ Returns:
80
+ OpponentPolicy instance.
81
+
82
+ Raises:
83
+ ValueError: If policy_name is not recognized.
84
+ """
85
+ if policy_name == "random":
86
+ return RandomOpponent()
87
+ elif policy_name in ("first", "last", "middle"):
88
+ return FixedActionOpponent(action_selector=policy_name)
89
+ else:
90
+ raise ValueError(f"Unknown opponent policy: {policy_name}")
src/envs/openspiel_env/test_docker_all_games.sh ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Automated test script for all OpenSpiel games in Docker
9
+ # Usage: ./test_docker_all_games.sh
10
+
11
+ set -e
12
+
13
+ # Colors for output
14
+ GREEN='\033[0;32m'
15
+ RED='\033[0;31m'
16
+ YELLOW='\033[1;33m'
17
+ BLUE='\033[0;34m'
18
+ NC='\033[0m' # No Color
19
+
20
+ # Configuration
21
+ IMAGE_NAME="openspiel-env:latest"
22
+ CONTAINER_NAME="openspiel-test"
23
+ PORT=8000
24
+ HEALTH_CHECK_URL="http://localhost:${PORT}/health"
25
+ MAX_WAIT=30
26
+
27
+ # Games to test
28
+ GAMES=("catch" "tic_tac_toe" "kuhn_poker" "cliff_walking" "2048" "blackjack")
29
+
30
+ # Results tracking
31
+ declare -a RESULTS
32
+ PASSED=0
33
+ FAILED=0
34
+
35
+ echo -e "${BLUE}========================================${NC}"
36
+ echo -e "${BLUE}OpenSpiel Docker Integration Test${NC}"
37
+ echo -e "${BLUE}========================================${NC}"
38
+ echo ""
39
+
40
+ # Function to cleanup containers
41
+ cleanup() {
42
+ echo -e "${YELLOW}Cleaning up containers...${NC}"
43
+ docker stop ${CONTAINER_NAME} 2>/dev/null || true
44
+ docker rm ${CONTAINER_NAME} 2>/dev/null || true
45
+ }
46
+
47
+ # Function to wait for server health
48
+ wait_for_health() {
49
+ local game=$1
50
+ echo -e " โณ Waiting for server to be ready..."
51
+
52
+ for i in $(seq 1 $MAX_WAIT); do
53
+ if curl -s -f ${HEALTH_CHECK_URL} > /dev/null 2>&1; then
54
+ echo -e " ${GREEN}โœ“${NC} Server ready (${i}s)"
55
+ return 0
56
+ fi
57
+ sleep 1
58
+ done
59
+
60
+ echo -e " ${RED}โœ—${NC} Server health check failed after ${MAX_WAIT}s"
61
+ return 1
62
+ }
63
+
64
+ # Function to test a game
65
+ test_game() {
66
+ local game=$1
67
+ echo -e "\n${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
68
+ echo -e "${BLUE}Testing: ${game}${NC}"
69
+ echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
70
+
71
+ # Stop any existing container
72
+ cleanup
73
+
74
+ # Start container with game
75
+ echo -e " ๐Ÿณ Starting Docker container..."
76
+ docker run -d \
77
+ --name ${CONTAINER_NAME} \
78
+ -p ${PORT}:8000 \
79
+ -e OPENSPIEL_GAME=${game} \
80
+ ${IMAGE_NAME} > /dev/null
81
+
82
+ # Wait for server to be ready
83
+ if ! wait_for_health ${game}; then
84
+ echo -e " ${RED}โœ— FAILED${NC} - Server did not start"
85
+ RESULTS+=("${game}:FAILED:Server did not start")
86
+ FAILED=$((FAILED + 1))
87
+ cleanup
88
+ return 1
89
+ fi
90
+
91
+ # Run Python client test
92
+ echo -e " ๐ŸŽฎ Running Python client test..."
93
+ if NO_PROXY=localhost,127.0.0.1 HTTP_PROXY= HTTPS_PROXY= \
94
+ PYTHONPATH=$PWD/src:$PYTHONPATH \
95
+ python3 examples/openspiel_simple.py > /tmp/test_${game}.log 2>&1; then
96
+
97
+ # Check if episode completed successfully
98
+ if grep -q "Episode finished!" /tmp/test_${game}.log; then
99
+ echo -e " ${GREEN}โœ“ PASSED${NC} - Episode completed successfully"
100
+ RESULTS+=("${game}:PASSED")
101
+ PASSED=$((PASSED + 1))
102
+ else
103
+ echo -e " ${RED}โœ— FAILED${NC} - Episode did not complete"
104
+ RESULTS+=("${game}:FAILED:Episode incomplete")
105
+ FAILED=$((FAILED + 1))
106
+ fi
107
+ else
108
+ echo -e " ${RED}โœ— FAILED${NC} - Python client error"
109
+ RESULTS+=("${game}:FAILED:Client error")
110
+ FAILED=$((FAILED + 1))
111
+ fi
112
+
113
+ # Cleanup
114
+ cleanup
115
+ }
116
+
117
+ # Run tests for all games
118
+ for game in "${GAMES[@]}"; do
119
+ test_game ${game}
120
+ done
121
+
122
+ # Print summary
123
+ echo -e "\n${BLUE}========================================${NC}"
124
+ echo -e "${BLUE}Test Summary${NC}"
125
+ echo -e "${BLUE}========================================${NC}"
126
+ echo ""
127
+
128
+ for result in "${RESULTS[@]}"; do
129
+ IFS=':' read -r game status message <<< "$result"
130
+ if [ "$status" == "PASSED" ]; then
131
+ echo -e " ${GREEN}โœ“${NC} ${game}"
132
+ else
133
+ echo -e " ${RED}โœ—${NC} ${game} - ${message}"
134
+ fi
135
+ done
136
+
137
+ echo ""
138
+ echo -e "Total: ${PASSED} passed, ${FAILED} failed out of ${#GAMES[@]} games"
139
+ echo ""
140
+
141
+ # Exit with appropriate code
142
+ if [ $FAILED -eq 0 ]; then
143
+ echo -e "${GREEN}========================================${NC}"
144
+ echo -e "${GREEN}All tests PASSED! ๐ŸŽ‰${NC}"
145
+ echo -e "${GREEN}========================================${NC}"
146
+ exit 0
147
+ else
148
+ echo -e "${RED}========================================${NC}"
149
+ echo -e "${RED}Some tests FAILED${NC}"
150
+ echo -e "${RED}========================================${NC}"
151
+ exit 1
152
+ fi