Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion server-components/engine_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ async def load_seed_from_base64(self, base64_data: str) -> torch.Tensor:
lambda: self._load_seed_from_base64_sync(base64_data)
)

async def load_engine(self, model_uri: str, quant: str | None = None):
async def load_engine(self, model_uri: str, quant: str | None = None, cpu_offload: bool = False):
"""Initialize or switch the WorldEngine model.

model_uri is required — the server does not have a default model.
Expand Down Expand Up @@ -400,6 +400,7 @@ def _create_engine():
device=DEVICE,
quant=requested_quant,
dtype=dtype,
cpu_offload=cpu_offload,
)

new_engine = await self._run_on_cuda_thread(_create_engine)
Expand Down
5 changes: 3 additions & 2 deletions server-components/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ async def websocket_endpoint(websocket: WebSocket):

Client -> Server:
{"type": "control", "buttons": [str], "mouse_dx": float, "mouse_dy": float, "ts": float}
{"type": "init", "req_id": "...", "model": str, "seed_image_data": str, "seed_filename": str, "scene_edit": bool, "action_logging": bool, "quant": str|null}
{"type": "init", "req_id": "...", "model": str, "seed_image_data": str, "seed_filename": str, "scene_edit": bool, "action_logging": bool, "quant": str|null, "cpu_offload": bool}
{"type": "reset"}
{"type": "pause"}
{"type": "resume"}
Expand Down Expand Up @@ -798,6 +798,7 @@ async def handle_init(msg: dict, is_game_loop: bool = False) -> tuple[bool, bool
seed_data = msg.get("seed_image_data")
seed_filename = msg.get("seed_filename")
quant = msg.get("quant")
cpu_offload = msg.get("cpu_offload", False)

# Update flags
if "scene_edit" in msg:
Expand Down Expand Up @@ -832,7 +833,7 @@ async def handle_init(msg: dict, is_game_loop: bool = False) -> tuple[bool, bool
if model_uri and (model_uri != getattr(world_engine, "model_uri", None) or quant_changed):
logger.info(f"[{client_host}] {'Live model switch' if is_game_loop else 'Requested model'}: {model_uri} (quant={quant})")
world_engine.set_progress_callback(progress_callback, asyncio.get_running_loop())
await world_engine.load_engine(model_uri, quant=quant)
await world_engine.load_engine(model_uri, quant=quant, cpu_offload=cpu_offload)
world_engine.set_progress_callback(None)
world_engine.seed_frame = None
session.perceptual_frame_count = 0
Expand Down
15 changes: 14 additions & 1 deletion src/components/MenuSettingsView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
const [showCredits, setShowCredits] = useState(false)

const [menuQuant, setMenuQuant] = useState<QuantOption>(settings.engine_quant ?? 'none')
const [menuCpuOffload, setMenuCpuOffload] = useState(() => settings.cpu_offload ?? false)
const [menuCapInferenceFps, setMenuCapInferenceFps] = useState(() => settings.cap_inference_fps ?? true)
const [menuKeybindings, setMenuKeybindings] = useState<Keybindings>(() => ({ ...settings.keybindings }))
const [menuSceneEditEnabled, setMenuSceneEditEnabled] = useState(
Expand Down Expand Up @@ -229,6 +230,7 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
setMenuMouseSensitivity(streamingToMenu(settings.mouse_sensitivity ?? mouseSensitivity))
setMenuServerUrl(configServerUrl)
setMenuQuant(settings.engine_quant ?? 'none')
setMenuCpuOffload(settings.cpu_offload ?? false)
setMenuKeybindings({ ...settings.keybindings })
setMenuSceneEditEnabled(settings.experimental?.scene_edit_enabled ?? false)
setMenuPerformanceStats(settings.debug_overlays.performance_stats)
Expand Down Expand Up @@ -358,6 +360,7 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
engine_mode: engineModeValue,
engine_model: menuWorldModel,
engine_quant: menuQuant,
cpu_offload: menuCpuOffload,
cap_inference_fps: menuCapInferenceFps,
mouse_sensitivity: streamingValue,
keybindings: menuKeybindings,
Expand All @@ -382,6 +385,7 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
menuServerUrl,
menuWorldModel,
menuQuant,
menuCpuOffload,
menuCapInferenceFps,
menuKeybindings,
menuSceneEditEnabled,
Expand All @@ -397,13 +401,14 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
const hasEngineModeChanged = menuEngineMode !== (configEngineMode === ENGINE_MODES.SERVER ? 'server' : 'standalone')
const hasWorldModelChanged = menuWorldModel !== configWorldModel
const hasQuantChanged = menuQuant !== (settings.engine_quant ?? 'none')
const hasCpuOffloadChanged = menuCpuOffload !== (settings.cpu_offload ?? false)

const handleBackClick = useCallback(async () => {
if (menuEngineMode === 'server' && (!menuServerUrl.trim() || serverUrlStatus !== 'valid')) {
setShowServerErrorModal(true)
return
}
if (isStreaming && (hasEngineModeChanged || hasWorldModelChanged || hasQuantChanged)) {
if (isStreaming && (hasEngineModeChanged || hasWorldModelChanged || hasQuantChanged || hasCpuOffloadChanged)) {
setShowModeSwitchModal(true)
return
}
Expand All @@ -416,6 +421,8 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
isStreaming,
hasEngineModeChanged,
hasWorldModelChanged,
hasQuantChanged,
hasCpuOffloadChanged,
applyDraftSettings,
onBack
])
Expand Down Expand Up @@ -586,6 +593,12 @@ const MenuSettingsView = ({ onBack, wide }: MenuSettingsViewProps) => {
onChange={(v) => setMenuQuant(v as QuantOption)}
/>
</SettingsRow>
<SettingsCheckbox
label="app.settings.performance.cpuOffload"
description="app.settings.performance.cpuOffloadDescription"
checked={menuCpuOffload}
onChange={setMenuCpuOffload}
/>
<SettingsCheckbox
label="app.settings.performance.capInferenceFps"
description="app.settings.performance.capInferenceFpsDescription"
Expand Down
11 changes: 8 additions & 3 deletions src/context/StreamingContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,10 @@ export const StreamingProvider = ({ children }: { children: ReactNode }) => {
// Set lastAppliedModel before await to prevent the lifecycle machine from
// seeing a model mismatch during the re-render triggered by setInitMetrics.
const quant = settings.engine_quant ?? 'none'
const cpuOffload = settings.cpu_offload ?? false
lastAppliedModelRef.current = settings.experimental?.scene_edit_enabled
? `${selectedModel}+scene_edit+${quant}`
: `${selectedModel}+${quant}`
? `${selectedModel}+scene_edit+${quant}+cpu${cpuOffload ? '1' : '0'}`
: `${selectedModel}+${quant}+cpu${cpuOffload ? '1' : '0'}`

const metrics = await sendInit({
model: selectedModel,
Expand All @@ -257,6 +258,7 @@ export const StreamingProvider = ({ children }: { children: ReactNode }) => {
scene_edit: settings.experimental?.scene_edit_enabled ?? false,
action_logging: settings.debug_overlays?.action_logging ?? false,
quant: quant !== 'none' ? quant : null,
cpu_offload: cpuOffload,
cap_inference_fps: settings.cap_inference_fps ?? true
})
setInitMetrics(metrics)
Expand All @@ -269,6 +271,7 @@ export const StreamingProvider = ({ children }: { children: ReactNode }) => {
isConnected,
settings?.engine_model,
settings?.engine_quant,
settings?.cpu_offload,
settings?.cap_inference_fps,
settings.experimental?.scene_edit_enabled,
settings.debug_overlays?.action_logging,
Expand Down Expand Up @@ -360,7 +363,8 @@ export const StreamingProvider = ({ children }: { children: ReactNode }) => {
isPaused,
sceneEditActive: sceneEditGrace,
sceneEditEnabled: settings.experimental?.scene_edit_enabled,
engineQuant: settings.engine_quant
engineQuant: settings.engine_quant,
cpuOffload: settings.cpu_offload
})
})
}, [
Expand All @@ -369,6 +373,7 @@ export const StreamingProvider = ({ children }: { children: ReactNode }) => {
error,
settings?.engine_model,
settings?.engine_quant,
settings?.cpu_offload,
settings.experimental?.scene_edit_enabled,
engineError,
hasReceivedFrame,
Expand Down
8 changes: 5 additions & 3 deletions src/context/streamingLifecyclePayload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@ type BuildStreamingLifecycleSyncPayloadArgs = {
sceneEditActive: boolean
sceneEditEnabled?: boolean
engineQuant?: string
cpuOffload?: boolean
}

export const buildStreamingLifecycleSyncPayload = (
args: BuildStreamingLifecycleSyncPayloadArgs
): StreamingLifecycleSyncPayload => {
// Encode scene_edit_enabled and quant into the model key so toggling
// either triggers the same intentional-reconnect flow as switching models.
// Encode scene_edit_enabled, quant, and cpu_offload into the model key so
// toggling any of them triggers the same intentional-reconnect flow as
// switching models.
const baseModel = args.engineModel || DEFAULT_WORLD_ENGINE_MODEL
const quant = args.engineQuant ?? 'none'
let selectedModel = args.sceneEditEnabled ? `${baseModel}+scene_edit` : baseModel
selectedModel = `${selectedModel}+${quant}`
selectedModel = `${selectedModel}+${quant}+cpu${args.cpuOffload ? '1' : '0'}`

return {
portalState: args.portalState,
Expand Down
3 changes: 3 additions & 0 deletions src/i18n/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ const en = {
quantization: 'Quantization',
quantizationDescription:
'Reduces model precision for faster inference and lower memory usage, at the cost of some visual quality.\nFirst use of INT8 quantization can take 1-2 hours while inference kernels are optimized - this is a one-time cost.',
cpuOffload: 'CPU Model Loading',
cpuOffloadDescription:
'Builds the model on CPU before moving it to GPU. Essential for systems with low VRAM.',
capInferenceFps: 'Cap Inference FPS',
capInferenceFpsDescription:
"Limits the generation rate to the model's trained framerate. Without this, the game may run faster than intended."
Expand Down
3 changes: 3 additions & 0 deletions src/i18n/goose.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ const goose = {
quantization: 'Feather compression',
quantizationDescription:
'Plucks a few feathers for faster waddling with less nest space, at the cost of some plumage quality.\nFirst INT8 plucking takes 1-2 hours while the goose optimizes its molt - this is a one-time cost.',
cpuOffload: 'Nest on land first',
cpuOffloadDescription:
'Builds the goose on land before sending it to the pond. Essential for ponds with limited space.',
capInferenceFps: 'Cap honk rate',
capInferenceFpsDescription:
"Limits the waddling rate to the flock's trained pace. Turning this off may result in the goose waddling faster than intended."
Expand Down
2 changes: 2 additions & 0 deletions src/i18n/ja.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ const ja = {
quantization: '量子化',
quantizationDescription:
'モデルの精度を下げて推論速度を向上させ、メモリ使用量を削減します。画質がわずかに低下します。\nINT8量子化の初回使用時は、推論カーネルの最適化に1-2時間かかる場合がありますが、これは一度だけのコストです。',
cpuOffload: 'CPUモデル読み込み',
cpuOffloadDescription: 'モデルをCPU上で構築してからGPUに転送します。VRAM不足のシステムでは必須です。',
capInferenceFps: '推論FPSを制限',
capInferenceFpsDescription:
'モデルの学習フレームレートに合わせて生成速度を制限します。オフにすると、ゲーム速度が意図より速くなる場合があります。'
Expand Down
2 changes: 2 additions & 0 deletions src/i18n/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ const zh = {
quantization: '量化',
quantizationDescription:
'降低模型精度以加快推理速度并减少显存占用,但会略微降低画质。\n首次使用INT8量化时,推理内核优化可能需要1-2小时,但这是一次性的。',
cpuOffload: 'CPU模型加载',
cpuOffloadDescription: '在CPU上构建模型后再移至GPU。对于显存不足的系统必不可少。',
capInferenceFps: '限制推理帧率',
capInferenceFpsDescription: '将生成速率限制为模型的训练帧率。关闭此选项可能导致游戏速度快于预期。'
},
Expand Down
1 change: 1 addition & 0 deletions src/types/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export const settingsSchema = z.object({
engine_mode: z.enum(['standalone', 'server']).default('standalone'),
engine_model: z.string().default(DEFAULT_WORLD_ENGINE_MODEL),
engine_quant: z.enum(QUANT_OPTIONS).default('none'),
cpu_offload: z.boolean().default(false),
cap_inference_fps: z.boolean().default(true),
custom_models: z.array(z.string()).default([]),
mouse_sensitivity: z.number().min(0.1).max(3.0).default(1.8),
Expand Down
1 change: 1 addition & 0 deletions src/types/ws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export type InitMessage = {
scene_edit?: boolean
action_logging?: boolean
quant?: string | null
cpu_offload?: boolean
cap_inference_fps?: boolean
}
export type InitResponse = {
Expand Down