Instructions to use ryefoxlime/TADBot with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ryefoxlime/TADBot with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ryefoxlime/TADBot")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("ryefoxlime/TADBot", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use ryefoxlime/TADBot with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ryefoxlime/TADBot" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/ryefoxlime/TADBot
- SGLang
How to use ryefoxlime/TADBot with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ryefoxlime/TADBot" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ryefoxlime/TADBot" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use ryefoxlime/TADBot with Docker Model Runner:
docker model run hf.co/ryefoxlime/TADBot
| from ollama import Client | |
| import ollama | |
| import chromadb | |
| import speech_recognition as sr | |
| import requests | |
| import pyttsx3 | |
| client = chromadb.Client() | |
| message_history = [ | |
| { | |
| 'id' : 1, | |
| 'prompt' : 'What is your name?', | |
| 'response' : 'My name is TADBot, a bot to help with short term remedial help for mental purposes. ' | |
| }, | |
| { | |
| 'id' : 2, | |
| 'prompt' : 'Bye', | |
| 'response' : 'Good to see you get better. Hopefully you reach out to me if you have any problems.' | |
| }, | |
| { | |
| 'id' : 3, | |
| 'prompt' : 'What is the essence of Life?', | |
| 'response' : 'The essence of life is to create what you want of yourself.' | |
| } | |
| ] | |
| convo = [] | |
| llm = Client(host='http://localhost:11434') | |
| def create_vector_db(conversations): | |
| vector_db_name = 'conversations' | |
| try: | |
| client.delete_collection(vector_db_name) | |
| except ValueError as e: | |
| pass | |
| vector_db = client.create_collection(name=vector_db_name) | |
| for c in conversations: | |
| serialized_convo = 'prompt: ' + c["prompt"] + ' response: ' + c["response"] | |
| response = ollama.embeddings(model = "nomic-embed-text",prompt = serialized_convo) | |
| embedding = response["embedding"] | |
| vector_db.add(ids = [str(c['id'])], embeddings = [embedding], documents = [serialized_convo]) | |
| def stream_response(prompt): | |
| convo.append({'role': "user", 'content': prompt}) | |
| output = llm.chat(model = "TADBot", messages = convo) | |
| response = output['message']['content'] | |
| print("TADBot: ") | |
| print(response) | |
| engine = pyttsx3.init('espeak') | |
| engine.say(response) | |
| engine.runAndWait() | |
| convo.append({'role': "assistant", 'content': response}) | |
| def retrieve_embeddings(prompt): | |
| response = ollama.embeddings(model = "nomic-embed-text", prompt = prompt) | |
| propmt_embedding = response['embedding'] | |
| vector_db = client.get_collection(name = 'conversations') | |
| results = vector_db.query(query_embeddings=[propmt_embedding], n_results = 1) | |
| best_embedding = results['documents'][0][0] | |
| return best_embedding | |
| create_vector_db(message_history) | |
| while True: | |
| r = sr.Recognizer() | |
| m = sr.Microphone() | |
| try: | |
| print("Say something!") | |
| with m as source: | |
| audio = r.listen(source) | |
| try: | |
| # for testing purposes, we're just using the default API key | |
| # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` | |
| # instead of `r.recognize_google(audio)` | |
| prompt = r.recognize_google(audio) | |
| print("Tadbot thinks you said: " + prompt) | |
| except sr.UnknownValueError: | |
| print("Tadbot could not understand audio") | |
| except sr.RequestError as e: | |
| print("Could not request results from Google Speech Recognition service; {0}".format(e)) | |
| print("Please wait...") | |
| with m as source: | |
| r.adjust_for_ambient_noise(source) | |
| if prompt == "bye" or prompt == "Bye": | |
| print("TADBot: Hopefully I was able to help you out today. Have a Nice Day!") | |
| break | |
| """ | |
| context = retrieve_embeddings(prompt) | |
| prompt = prompt + "CONTEXT FROM EMBEDDING: " + context | |
| """ | |
| stream_response(prompt) | |
| except KeyboardInterrupt: | |
| pass | |