zoedepth / handler.py
yanmuyuan
fix
a026586
import base64
import io
import json
from typing import Dict, Any
from PIL import Image
from transformers import pipeline
class EndpointHandler:
"""
Custom handler for the ZoeDepth model, fully compliant with the latest
Hugging Face Inference Endpoints documentation.
The final result is serialized into a single JSON string.
"""
def __init__(self, path=""):
# Initialize the pipeline for depth-estimation
self.pipe = pipeline(task="depth-estimation", model=path)
print("Depth estimation pipeline initialized successfully.")
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
"""
This method is called for every API request.
Args:
data (Dict): The input data dictionary. Can be PIL Image or bytes.
Returns:
Dict[str, str]: A dictionary with a single key "generated_text",
containing a JSON string of the results.
"""
# Get image from the request
inputs = data.pop("inputs", data)
# Handle both PIL Image objects (from image content-type) and bytes (from JSON)
if isinstance(inputs, Image.Image):
image = inputs
else:
image = Image.open(io.BytesIO(inputs))
# Pass the image to the pipeline
prediction = self.pipe(image)
# Extract raw depth data and visual map
raw_depth_tensor = prediction["predicted_depth"]
raw_depth_data = raw_depth_tensor.cpu().tolist()
visual_map_image = prediction["depth"]
buffered = io.BytesIO()
visual_map_image.save(buffered, format="PNG")
visual_map_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Create a dictionary to hold all results
results = {
"raw_depth_data": raw_depth_data,
"visual_depth_map": f"data:image/png;base64,{visual_map_base64}"
}
# Serialize the entire results dictionary into a JSON string
json_output_string = json.dumps(results)
# Return the final dictionary in the required format
return {"generated_text": json_output_string}