| | |
| | LLM_CONFIG = { |
| | "primary_provider": "huggingface", |
| | "models": { |
| | "reasoning_primary": { |
| | "model_id": "Qwen/Qwen2.5-7B-Instruct", |
| | "task": "general_reasoning", |
| | "max_tokens": 10000, |
| | "temperature": 0.7, |
| | "cost_per_token": 0.000015, |
| | "fallback": "gpt2", |
| | "is_chat_model": True |
| | }, |
| | "embedding_specialist": { |
| | "model_id": "sentence-transformers/all-MiniLM-L6-v2", |
| | "task": "embeddings", |
| | "vector_dimensions": 384, |
| | "purpose": "semantic_similarity", |
| | "cost_advantage": "90%_cheaper_than_primary", |
| | "is_chat_model": False |
| | }, |
| | "classification_specialist": { |
| | "model_id": "Qwen/Qwen2.5-7B-Instruct", |
| | "task": "intent_classification", |
| | "max_length": 512, |
| | "specialization": "fast_inference", |
| | "latency_target": "<100ms", |
| | "is_chat_model": True |
| | }, |
| | "safety_checker": { |
| | "model_id": "Qwen/Qwen2.5-7B-Instruct", |
| | "task": "content_moderation", |
| | "confidence_threshold": 0.85, |
| | "purpose": "bias_detection", |
| | "is_chat_model": True |
| | } |
| | }, |
| | "routing_logic": { |
| | "strategy": "task_based_routing", |
| | "fallback_chain": ["primary", "fallback", "degraded_mode"], |
| | "load_balancing": "round_robin_with_health_check" |
| | } |
| | } |
| |
|