forked from anomalyco/opencode
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest-databricks-models.sh
More file actions
executable file
·195 lines (169 loc) · 6.08 KB
/
test-databricks-models.sh
File metadata and controls
executable file
·195 lines (169 loc) · 6.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/bin/bash
# Test script for Databricks models via opencode API
# Tests both basic response and tool call capabilities
# Auto-starts/restarts the dev server as needed
BASE_URL="http://localhost:4096"
PROVIDER="databricks"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Representative models from each family
MODELS=(
"databricks-claude-sonnet-4-6"
"databricks-gpt-5-3-codex"
"databricks-gpt-5-2-codex"
"databricks-gpt-5"
"databricks-gpt-5-mini"
"databricks-gemini-2-5-flash"
"databricks-gemini-2-5-pro"
"databricks-llama-4-maverick"
"databricks-meta-llama-3-3-70b-instruct"
"databricks-qwen3-next-80b-a3b-instruct"
)
PASSED=0
FAILED=0
WARNINGS=0
ERRORS=()
WARN_LIST=()
# Auto-manage the dev server
start_server() {
echo -n " Starting dev server... "
pkill -f "bun.*serve" 2>/dev/null
sleep 1
cd "$SCRIPT_DIR" && bun dev serve > /tmp/opencode-test-server.log 2>&1 &
SERVER_PID=$!
# Wait for server to be ready (up to 30s)
for i in $(seq 1 30); do
if curl -s "$BASE_URL/session" | jq -e . > /dev/null 2>&1; then
echo "OK (pid: $SERVER_PID)"
return 0
fi
sleep 1
done
echo "FAIL (timeout)"
return 1
}
stop_server() {
pkill -f "bun.*serve" 2>/dev/null
}
test_model() {
local model="$1"
echo ""
echo "============================================"
echo "Testing: $model"
echo "============================================"
# 1. Create session
echo -n " Creating session... "
SESSION_RESP=$(curl -s -X POST "$BASE_URL/session" \
-H "Content-Type: application/json" \
-d '{"title":"test-'"$model"'"}')
SESSION_ID=$(echo "$SESSION_RESP" | jq -r '.id // empty')
if [ -z "$SESSION_ID" ]; then
echo "FAIL"
FAILED=$((FAILED + 1))
ERRORS+=("$model: session creation failed")
return 1
fi
echo "$SESSION_ID"
# 2. Test basic response
echo -n " Basic response... "
MSG_RESP=$(curl -s --max-time 120 -X POST "$BASE_URL/session/$SESSION_ID/message" \
-H "Content-Type: application/json" \
-d '{
"model": {"providerID": "'"$PROVIDER"'", "modelID": "'"$model"'"},
"parts": [{"type": "text", "text": "What is 2+2? Answer with just the number."}]
}')
# Check for API-level errors
API_ERROR=$(echo "$MSG_RESP" | jq -r '.info.error.name // empty' 2>/dev/null)
API_ERROR_MSG=$(echo "$MSG_RESP" | jq -r '.info.error.data.message // empty' 2>/dev/null | head -c 200)
if [ -n "$API_ERROR" ]; then
echo "FAIL ($API_ERROR: ${API_ERROR_MSG:0:100})"
FAILED=$((FAILED + 1))
ERRORS+=("$model: $API_ERROR - ${API_ERROR_MSG:0:150}")
return 1
fi
# Check for text parts
TEXT_CONTENT=$(echo "$MSG_RESP" | jq -r '[.parts[] | select(.type == "text") | .text] | join(" ")' 2>/dev/null)
INPUT_TOKENS=$(echo "$MSG_RESP" | jq '.info.tokens.input // 0' 2>/dev/null)
OUTPUT_TOKENS=$(echo "$MSG_RESP" | jq '.info.tokens.output // 0' 2>/dev/null)
if [ -z "$TEXT_CONTENT" ] || [ "$TEXT_CONTENT" = "null" ]; then
echo "FAIL (no text in response)"
FAILED=$((FAILED + 1))
ERRORS+=("$model: no text in basic response")
return 1
fi
echo "OK (\"${TEXT_CONTENT:0:50}\") [${INPUT_TOKENS}/${OUTPUT_TOKENS} tokens]"
# 3. Test tool call - ask it to use the read tool
echo -n " Tool call... "
TOOL_RESP=$(curl -s --max-time 300 -X POST "$BASE_URL/session/$SESSION_ID/message" \
-H "Content-Type: application/json" \
-d '{
"model": {"providerID": "'"$PROVIDER"'", "modelID": "'"$model"'"},
"parts": [{"type": "text", "text": "Use the read tool to read the file at /Users/david.okeeffe/Repos/opencode/opencode.json and tell me the provider name configured in it."}]
}')
# Check for API-level errors
TOOL_API_ERROR=$(echo "$TOOL_RESP" | jq -r '.info.error.name // empty' 2>/dev/null)
TOOL_API_ERROR_MSG=$(echo "$TOOL_RESP" | jq -r '.info.error.data.message // empty' 2>/dev/null | head -c 200)
if [ -n "$TOOL_API_ERROR" ]; then
echo "FAIL ($TOOL_API_ERROR: ${TOOL_API_ERROR_MSG:0:100})"
FAILED=$((FAILED + 1))
ERRORS+=("$model: tool test - $TOOL_API_ERROR")
return 1
fi
TOOL_TEXT=$(echo "$TOOL_RESP" | jq -r '[.parts[] | select(.type == "text") | .text] | join(" ")' 2>/dev/null | head -c 150)
# The prompt endpoint returns only the final assistant message.
# Check full message history for tool calls in intermediate steps.
HISTORY=$(curl -s "$BASE_URL/session/$SESSION_ID/message" 2>/dev/null)
HAS_TOOL_IN_HISTORY=$(echo "$HISTORY" | jq '[.[] | .parts[] | select(.type == "tool")] | length' 2>/dev/null)
if [ "$HAS_TOOL_IN_HISTORY" -gt 0 ]; then
TOOL_NAMES=$(echo "$HISTORY" | jq -r '[.[] | .parts[] | select(.type == "tool") | .tool] | unique | join(", ")' 2>/dev/null)
echo "OK (tools: $TOOL_NAMES) text: \"${TOOL_TEXT:0:80}\""
PASSED=$((PASSED + 1))
else
echo "WARN (no tool call, but text: \"${TOOL_TEXT:0:80}\")"
WARNINGS=$((WARNINGS + 1))
WARN_LIST+=("$model: responded without using tools")
if [ -n "$TOOL_TEXT" ]; then
PASSED=$((PASSED + 1))
else
FAILED=$((FAILED + 1))
ERRORS+=("$model: no tool call and no text in tool test")
fi
fi
}
echo "====================================================="
echo " Databricks Model Integration Test Suite"
echo "====================================================="
echo " Server: $BASE_URL"
echo " Models: ${#MODELS[@]}"
echo "====================================================="
# Auto-start server
if ! curl -s "$BASE_URL/session" | jq -e . > /dev/null 2>&1; then
start_server || exit 1
else
echo " Server already running"
fi
for model in "${MODELS[@]}"; do
test_model "$model"
done
echo ""
echo "====================================================="
echo " RESULTS"
echo "====================================================="
echo " Passed: $PASSED / ${#MODELS[@]}"
echo " Failed: $FAILED / ${#MODELS[@]}"
echo " Warnings: $WARNINGS"
if [ ${#ERRORS[@]} -gt 0 ]; then
echo ""
echo " Failures:"
for err in "${ERRORS[@]}"; do
echo " - $err"
done
fi
if [ ${#WARN_LIST[@]} -gt 0 ]; then
echo ""
echo " Warnings:"
for w in "${WARN_LIST[@]}"; do
echo " - $w"
done
fi
echo "====================================================="
exit $FAILED