Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autobot-backend/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ backend:
# gpu_endpoint: http://172.16.168.20:11434
# gpu_models:
# - "qwen3.5:9b"
# - "deepseek-r1:14b"
# - "mistral:7b-instruct"
# - "codellama:13b"

# Infrastructure host overrides
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def test_cache_hit_skips_summary_llm_call(self, mock_get_redis):
)

cached_payload = json.dumps(
{"summary": "cached summary", "model": "llama3.2:3b"}
{"summary": "cached summary", "model": "llama3.2:1b"}
)
mock_get_redis.return_value = self._mock_redis(cached=cached_payload)
cog = ContextGeneratorCognifier()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def setup_method(self):

with patch("src.orchestrator.global_config_manager") as mock_config:
mock_config.get_llm_config.return_value = {
"orchestrator_llm": "llama3.2:3b"
"orchestrator_llm": "llama3.2:1b"
}
self.orchestrator = Orchestrator()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _get_config_updates(self) -> dict:
}],
"src/config.py": [
{
"find": f'"orchestrator": os.getenv("AUTOBOT_ORCHESTRATOR_MODEL", "llama3.2:3b")',
"find": f'"orchestrator": os.getenv("AUTOBOT_ORCHESTRATOR_MODEL", "llama3.2:1b")',
"replace": f'"orchestrator": os.getenv("AUTOBOT_ORCHESTRATOR_MODEL", "{_ROUTING_MODEL}")',
"line_context": "models configuration — orchestrator",
},
Expand All @@ -259,7 +259,7 @@ def _get_config_updates(self) -> dict:
},
],
"backend/utils/connection_utils.py": [{
"find": '"deepseek-r1:14b"',
"find": '"qwen3.5:9b"',
"replace": f'"{_DEFAULT_MODEL}"',
"line_context": "AUTOBOT_DEFAULT_LLM_MODEL default",
}],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def generate_browser_fix_script(self):
use_phi2: false,
api_endpoint: 'http://localhost:8001',
ollama_endpoint: 'http://localhost:11434',
ollama_model: 'deepseek-r1:14b',
ollama_model: 'qwen3.5:9b',
streaming: false
},
ui: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ async def get_available_models(self) -> List[str]:
"""Get list of available models for NPU."""
return [
"llama3.2:1b-instruct-q4_K_M",
"llama3.2:3b-instruct-q4_K_M",
"llama3.2:1b-instruct-q4_K_M",
"nomic-embed-text",
"text-classification-model",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ def get_components(self) -> Dict[str, Any]:
],
"models": [
"artifish/llama3.2-uncensored:latest (2.2GB) - GPU",
"deepseek-r1:14b (8.4GB) - GPU for complex reasoning",
"llama3.2:3b-instruct-q4_K_M (2GB) - GPU backup",
"qwen3.5:9b (5.2GB) - GPU for complex reasoning",
"mistral:7b-instruct (4.1GB) - GPU backup",
],
},
"windows_npu_worker": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,18 +192,18 @@ def create_model_recommendations():
"orchestrator": "artifish/llama3.2-uncensored:latest", # 2.2GB
"rag": "artifish/llama3.2-uncensored:latest", # 2.2GB
"research": "artifish/llama3.2-uncensored:latest", # 2.2GB
"chat": "llama3.2:3b-instruct-q4_K_M", # 2GB
"chat": "mistral:7b-instruct", # 4.1GB
"analysis": "artifish/llama3.2-uncensored:latest", # 2.2GB
"planning": "artifish/llama3.2-uncensored:latest", # 2.2GB
}
parallel_capacity = "2-3 concurrent models"
elif total_memory >= 6000: # 6GB GPU
recommended_models = {
"orchestrator": "llama3.2:3b-instruct-q4_K_M", # 2GB
"orchestrator": "llama3.2:1b-instruct-q4_K_M", # 807MB
"rag": "artifish/llama3.2-uncensored:latest", # 2.2GB
"research": "llama3.2:3b-instruct-q4_K_M", # 2GB
"research": "mistral:7b-instruct", # 4.1GB
"chat": "llama3.2:1b-instruct-q4_K_M", # 807MB
"analysis": "llama3.2:3b-instruct-q4_K_M", # 2GB
"analysis": "mistral:7b-instruct", # 4.1GB
}
parallel_capacity = "2 concurrent models"
else: # 4GB GPU
Expand Down
4 changes: 2 additions & 2 deletions docs/frontend/settings-panel-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,8 @@ const llmConfig = {
providers: {
ollama: {
endpoint: 'http://localhost:11434/api/generate',
models: ['deepseek-r1:14b', 'dolphin-llama3:8b'],
selected_model: 'deepseek-r1:14b'
models: ['qwen3.5:9b', 'dolphin-llama3:8b'],
selected_model: 'qwen3.5:9b'
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion docs/guides/LLM_Interface_Migration_Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ llm:
ollama:
enabled: true
base_url: "http://localhost:11434"
default_model: "deepseek-r1:14b"
default_model: "qwen3.5:9b"
openai:
enabled: false
api_key: ""
Expand Down
4 changes: 2 additions & 2 deletions docs/guides/chat-ollama-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ backend:
# gpu_endpoint: http://172.16.168.20:11434
# gpu_models:
# - "qwen3.5:9b"
# - "deepseek-r1:14b"
# - "mistral:7b-instruct"
# - "codellama:13b"

# Infrastructure host overrides
Expand Down Expand Up @@ -1018,7 +1018,7 @@ backend:
gpu_endpoint: http://172.16.168.20:11434 # GPU-accelerated endpoint
gpu_models:
- "qwen3.5:9b"
- "deepseek-r1:14b"
- "mistral:7b-instruct"
- "codellama:13b"
```

Expand Down
2 changes: 1 addition & 1 deletion docs/guides/llm-middleware-telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ backend:
# gpu_endpoint: http://172.16.168.20:11434
# gpu_models:
# - "qwen3.5:9b"
# - "deepseek-r1:14b"
# - "mistral:7b-instruct"

# Fallback path for _get_ollama_endpoint_fallback() via get_host("ollama")
infrastructure:
Expand Down
2 changes: 1 addition & 1 deletion docs/migration/LLM_Interface_Migration_Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ llm:
ollama:
enabled: true
base_url: "http://localhost:11434"
default_model: "deepseek-r1:14b"
default_model: "qwen3.5:9b"
openai:
enabled: false
api_key: ""
Expand Down
16 changes: 8 additions & 8 deletions docs/plans/2026-02-02-knowledge-graph-enhancement-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ stages:
cognify:
tasks:
- name: extract_entities
model: llama3.2:3b
model: llama3.2:1b
batch_size: 50
confidence_threshold: 0.7
- name: extract_relationships
model: llama3.2:3b
model: llama3.2:1b
batch_size: 50
- name: extract_events
enabled: true
model: llama3.2:3b
model: llama3.2:1b
- name: generate_summaries
enabled: true
levels: [chunk, section, document]
Expand Down Expand Up @@ -451,7 +451,7 @@ class EntityExtractor(BaseCognifier):
"""LLM-powered entity extraction from text chunks."""

def __init__(self, config: dict):
self.model = config.get('model', 'llama3.2:3b')
self.model = config.get('model', 'llama3.2:1b')
self.confidence_threshold = config.get('confidence_threshold', 0.7)
self.llm = LLMService(model=self.model)

Expand Down Expand Up @@ -565,7 +565,7 @@ class RelationshipExtractor(BaseCognifier):
"""LLM-powered relationship extraction."""

def __init__(self, config: dict):
self.model = config.get('model', 'llama3.2:3b')
self.model = config.get('model', 'llama3.2:1b')
self.llm = LLMService(model=self.model)

async def process(
Expand Down Expand Up @@ -786,7 +786,7 @@ class EventExtractor(BaseCognifier):
"""LLM-powered event and temporal extraction."""

def __init__(self, config: dict):
self.model = config.get('model', 'llama3.2:3b')
self.model = config.get('model', 'llama3.2:1b')
self.llm = LLMService(model=self.model)

async def process(
Expand Down Expand Up @@ -1036,7 +1036,7 @@ class HierarchicalSummarizer(BaseCognifier):
"""Generate multi-level summaries of documents."""

def __init__(self, config: dict):
self.model = config.get('model', 'llama3.2:3b')
self.model = config.get('model', 'llama3.2:1b')
self.levels = config.get('levels', ['chunk', 'section', 'document'])
self.max_words = config.get('max_words', {
'chunk': 50,
Expand Down Expand Up @@ -1449,7 +1449,7 @@ autobot-frontend/src/components/knowledge/
# Add to config/ssot_config.py
knowledge_graph:
pipeline:
default_model: "llama3.2:3b"
default_model: "llama3.2:1b"
batch_size: 50
confidence_threshold: 0.7
temporal:
Expand Down
2 changes: 1 addition & 1 deletion docs/plans/2026-02-02-phase3-client-library-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ export async function discoverService(
| Local Tier | Default Model | SLM Provider |
|------------|---------------|--------------|
| TIER_1 | llama3.2:1b | ollama |
| TIER_2 | llama3.2:3b | ollama |
| TIER_2 | llama3.2:1b | ollama |
| TIER_3 | qwen3.5:9b | ollama |
| TIER_4 | qwen3.5:9b | ollama |

Expand Down
Loading