FROM python:3.11-slim


# Install required system dependencies
RUN apt-get update && apt-get install -y \
    git curl build-essential cmake \
    && rm -rf /var/lib/apt/lists/*


# Set working directory
WORKDIR /app


# Create writable directories
RUN mkdir -p /app/.cache /app/vector_database && chmod -R 777 /app


# Set environment variables
ENV TRANSFORMERS_CACHE=/app/.cache \
    HF_HOME=/app/.cache \
    CHROMADB_DISABLE_TELEMETRY=true


# Install dependencies from requirements.txt first
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt


# Install nltk and download punkt tokenizer once during build
RUN python -m nltk.downloader punkt punkt_tab


# ✅ STEP 1: Copy the source data and the Python script into the image
COPY ./combined_context.jsonl .
COPY ./create_granular_chunks.py .


# ✅ STEP 2: Run the script to generate the chunks file inside the image
RUN python create_granular_chunks.py


# ✅ STEP 3: The 'granular_chunks_improved.jsonl' now exists inside the image.
# We no longer need to copy it from our local machine.


# Note: As recommended before, 'llama-cpp-python' should be removed from requirements.txt
# to rely on the more stable, version-pinned installation below.
RUN pip install --no-cache-dir llama-cpp-python==0.2.61


# Copy the rest of the application code
COPY ./app ./app


# Download your fine-tuned TinyLlama GGUF model
RUN curl -fL -o /app/tinyllama_dop_q4_k_m.gguf \
    https://huggingface.co/Kalpokoch/FinetunedQuantizedTinyLama/resolve/main/tinyllama_dop_q4_k_m.gguf \
    && echo "✅ TinyLlama model downloaded."


# Expose the application port
EXPOSE 7860


# Run the FastAPI application
CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]