FROM python:3.11-slim # Install required system dependencies RUN apt-get update && apt-get install -y \ git curl build-essential cmake \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Create writable directories RUN mkdir -p /app/.cache /app/vector_database && chmod -R 777 /app # Set environment variables ENV TRANSFORMERS_CACHE=/app/.cache \ HF_HOME=/app/.cache \ CHROMADB_DISABLE_TELEMETRY=true # Install dependencies from requirements.txt first COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Install nltk and download punkt tokenizer once during build RUN python -m nltk.downloader punkt punkt_tab # ✅ STEP 1: Copy the source data and the Python script into the image COPY ./combined_context.jsonl . COPY ./create_granular_chunks.py . # ✅ STEP 2: Run the script to generate the chunks file inside the image RUN python create_granular_chunks.py # ✅ STEP 3: The 'granular_chunks_improved.jsonl' now exists inside the image. # We no longer need to copy it from our local machine. # Note: As recommended before, 'llama-cpp-python' should be removed from requirements.txt # to rely on the more stable, version-pinned installation below. RUN pip install --no-cache-dir llama-cpp-python==0.2.61 # Copy the rest of the application code COPY ./app ./app # Download your fine-tuned TinyLlama GGUF model RUN curl -fL -o /app/tinyllama_dop_q4_k_m.gguf \ https://huggingface.co/Kalpokoch/FinetunedQuantizedTinyLama/resolve/main/tinyllama_dop_q4_k_m.gguf \ && echo "✅ TinyLlama model downloaded." # Expose the application port EXPOSE 7860 # Run the FastAPI application CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]