-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv.example
More file actions
29 lines (25 loc) · 819 Bytes
/
env.example
File metadata and controls
29 lines (25 loc) · 819 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# llama.cpp Server Configuration
# Copy this file to .env and customize as needed
# Usage: source .env && ./scripts/start_llama_server.sh
# MODEL CONFIGURATION
MODEL_QUANTIZATION=UD-Q4_K_XL
# LLAMA_MODEL=unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:UD-Q4_K_XL
# SERVER CONFIGURATION
LLAMA_HOST=127.0.0.1
LLAMA_PORT=8080
# MODEL PARAMETERS
LLAMA_CTX_SIZE=16384
LLAMA_GPU_LAYERS=99
LLAMA_THREADS=-1
LLAMA_BATCH_SIZE=512
LLAMA_PARALLEL=4
LLAMA_LOG_LEVEL=info
# DEVSTRAL 2 PRODUCTION SETTINGS (per Unsloth documentation)
# Temperature: Recommended 0.15 for Devstral 2
LLAMA_TEMP=0.15
# Min P: Recommended 0.01 for Devstral 2 (default is 0.1)
LLAMA_MIN_P=0.01
# Priority: Production priority setting
LLAMA_PRIO=3
# Model Alias: Used for API model identification
LLAMA_ALIAS=unsloth/Devstral-Small-2-24B-Instruct-2512