llama.cpp-config-scripts/env.example at main · akarazhev/llama.cpp-config-scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# llama.cpp Server Configuration
# Copy this file to .env and customize as needed
# Usage: source .env && ./scripts/start_llama_server.sh

# MODEL CONFIGURATION
MODEL_QUANTIZATION=UD-Q4_K_XL
# LLAMA_MODEL=unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:UD-Q4_K_XL

# SERVER CONFIGURATION
LLAMA_HOST=127.0.0.1
LLAMA_PORT=8080

# MODEL PARAMETERS
LLAMA_CTX_SIZE=16384
LLAMA_GPU_LAYERS=99
LLAMA_THREADS=-1
LLAMA_BATCH_SIZE=512
LLAMA_PARALLEL=4
LLAMA_LOG_LEVEL=info

# DEVSTRAL 2 PRODUCTION SETTINGS (per Unsloth documentation)
# Temperature: Recommended 0.15 for Devstral 2
LLAMA_TEMP=0.15
# Min P: Recommended 0.01 for Devstral 2 (default is 0.1)
LLAMA_MIN_P=0.01
# Priority: Production priority setting
LLAMA_PRIO=3
# Model Alias: Used for API model identification
LLAMA_ALIAS=unsloth/Devstral-Small-2-24B-Instruct-2512