|
|
import base64 |
|
|
import io |
|
|
import json |
|
|
from typing import Dict, Any |
|
|
from PIL import Image |
|
|
from transformers import pipeline |
|
|
|
|
|
class EndpointHandler: |
|
|
""" |
|
|
Custom handler for the ZoeDepth model, fully compliant with the latest |
|
|
Hugging Face Inference Endpoints documentation. |
|
|
The final result is serialized into a single JSON string. |
|
|
""" |
|
|
def __init__(self, path=""): |
|
|
|
|
|
self.pipe = pipeline(task="depth-estimation", model=path) |
|
|
print("Depth estimation pipeline initialized successfully.") |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: |
|
|
""" |
|
|
This method is called for every API request. |
|
|
|
|
|
Args: |
|
|
data (Dict): The input data dictionary. Can be PIL Image or bytes. |
|
|
|
|
|
Returns: |
|
|
Dict[str, str]: A dictionary with a single key "generated_text", |
|
|
containing a JSON string of the results. |
|
|
""" |
|
|
|
|
|
inputs = data.pop("inputs", data) |
|
|
|
|
|
|
|
|
if isinstance(inputs, Image.Image): |
|
|
image = inputs |
|
|
else: |
|
|
image = Image.open(io.BytesIO(inputs)) |
|
|
|
|
|
|
|
|
prediction = self.pipe(image) |
|
|
|
|
|
|
|
|
raw_depth_tensor = prediction["predicted_depth"] |
|
|
raw_depth_data = raw_depth_tensor.cpu().tolist() |
|
|
|
|
|
visual_map_image = prediction["depth"] |
|
|
buffered = io.BytesIO() |
|
|
visual_map_image.save(buffered, format="PNG") |
|
|
visual_map_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
|
|
|
|
|
|
results = { |
|
|
"raw_depth_data": raw_depth_data, |
|
|
"visual_depth_map": f"data:image/png;base64,{visual_map_base64}" |
|
|
} |
|
|
|
|
|
|
|
|
json_output_string = json.dumps(results) |
|
|
|
|
|
|
|
|
return {"generated_text": json_output_string} |
|
|
|