diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 3ecd8f1f4..205e08c6a 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -19,6 +19,7 @@ {"id":"bd-2gc9","title":"Nested tight lists incorrectly marked as loose (Para instead of Plain)","description":"In process_list() at crates/pampa/src/pandoc/treesitter.rs:183-192, multi-block list items with at least one paragraph are unconditionally marked as loose. This is wrong: a list item containing [Paragraph, BulletList] (e.g., '* foo\\n * bar') has no blank line between blocks and should remain tight. The CommonMark spec says a list is loose only when items are separated by blank lines or contain two block elements WITH A BLANK LINE between them.","status":"closed","priority":1,"issue_type":"bug","created_at":"2026-02-13T22:49:14.652871Z","created_by":"cscheid","updated_at":"2026-02-13T23:05:58.650439Z","closed_at":"2026-02-13T23:05:58.650417Z","close_reason":"Fixed: detect blank lines via tree-sitter block_continuation spans in process_list_item, propagate through IntermediateListItem","source_repo":".","compaction_level":0,"original_size":0} {"id":"bd-2gkx","title":"Kanban: New card creation button with form","description":"Add a 'New Card' button that opens a form with: title (required), type dropdown (feature/milestone/bug/task), optional deadline with date picker, optional status. Default creation date to today. Extend addCard() in astHelpers.ts to support deadline and status. Plan: claude-notes/plans/2026-02-11-kanban-ui-enhancements.md","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-02-11T18:32:46.896753Z","created_by":"cscheid","updated_at":"2026-02-11T18:39:33.119846Z","closed_at":"2026-02-11T18:39:33.119829Z","close_reason":"Implemented - new card form with type, status, and deadline","source_repo":".","compaction_level":0,"original_size":0} {"id":"bd-2h6x","title":"Slide thumbnails show in outline pane for non-slide documents","description":"The useSlideThumbnails hook runs unconditionally for all documents, generating thumbnails even for regular HTML documents. PreviewRouter detects format: q2-slides but doesn't communicate the format back to Editor.tsx. Fix: add onFormatChange callback from PreviewRouter so Editor can conditionally generate thumbnails. Plan: claude-notes/plans/2026-02-26-slide-thumbnails-conditional.md","status":"closed","priority":2,"issue_type":"bug","created_at":"2026-02-26T15:04:39.650792Z","created_by":"cscheid","updated_at":"2026-02-26T15:10:26.512270Z","closed_at":"2026-02-26T15:10:26.512246Z","close_reason":"Fixed: slide thumbnails now conditional on format: q2-slides","source_repo":".","compaction_level":0,"original_size":0} +{"id":"bd-2mxo","title":"Metadata materialization drops source_info provenance","description":"MergedConfig::materialize() in quarto-config/src/materialize.rs loses source provenance that pampa's YAML parser correctly provides:\n- Map container source_info replaced with heuristic/default (lines ~121-161)\n- Map entry key_source always set to SourceInfo::default() (line ~132)\n- Array container source_info uses only last item (lines ~109-113)\n\nScalar values are preserved correctly. Affects error reporting for any metadata-related diagnostics after the merge stage. The code has an explicit comment acknowledging the key_source loss: 'We lose key source info during materialization.'\n\nPre-existing since commit 955bc326 (2025-12-07, 'config merging in'). Not blocking Plan 0 but degrades the YAML frontmatter Concat piece in the QMD writer's SourceInfo output.","status":"open","priority":2,"issue_type":"bug","created_at":"2026-04-20T15:52:16.644685Z","created_by":"gordon","updated_at":"2026-04-20T15:52:16.644685Z","source_repo":".","compaction_level":0,"original_size":0} {"id":"bd-2olu","title":"Hub MCP Server: automerge project access for AI agents","description":"Design and implement an MCP server that allows AI coding agents (Claude Code, Codex, etc.) to read and write files in Quarto Hub projects via automerge sync, without requiring filesystem access. Plan: claude-notes/plans/2026-03-13-hub-mcp-server-design.md","status":"open","priority":1,"issue_type":"feature","created_at":"2026-03-13T23:38:33.161278Z","created_by":"cscheid","updated_at":"2026-03-13T23:38:33.161278Z","source_repo":".","compaction_level":0,"original_size":0} {"id":"bd-2rbk","title":"Improve pampa test skip behavior when Pandoc is absent","description":"4 pampa tests hard-assert instead of gracefully skipping when Pandoc is not available. Should use test skip or conditional compilation so cargo xtask test runs cleanly without Pandoc. Low priority since pampa is currently excluded from Windows test compilation due to v8.","status":"open","priority":3,"issue_type":"task","created_at":"2026-03-20T13:36:13.451656600Z","created_by":"cderv","updated_at":"2026-03-20T13:36:13.451656600Z","source_repo":".","compaction_level":0,"original_size":0} {"id":"bd-2s6j","title":"Kanban: Horizontal rows instead of columns","description":"Change the board layout from vertical status columns to horizontal rows. Each status group becomes a full-width row with cards flowing left-to-right. Plan: claude-notes/plans/2026-02-11-kanban-ui-enhancements.md","status":"closed","priority":2,"issue_type":"task","created_at":"2026-02-11T18:32:33.985420Z","created_by":"cscheid","updated_at":"2026-02-11T18:34:29.136267Z","closed_at":"2026-02-11T18:34:29.136249Z","close_reason":"Implemented - board now uses horizontal rows","source_repo":".","compaction_level":0,"original_size":0} diff --git a/CLAUDE.md b/CLAUDE.md index 0f71e61b7..751a2d2ce 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,7 +30,7 @@ Always follow TDD workflow: write/update tests BEFORE implementing features. Whe 1. Stage and commit changes as needed 2. **Verify the full workspace compiles cleanly** (`cargo build --workspace`) 3. **Verify the full workspace tests pass** (`cargo nextest run --workspace`) -4. **For changes to quarto-core or quarto-pandoc-types**: Run `cargo xtask verify` to ensure hub-client/WASM builds work +4. **Run `cargo xtask verify`** — at minimum `cargo xtask verify --skip-hub-build` for Rust-only changes; full `cargo xtask verify` when the WASM leg could be affected (any change under `quarto-core`, `quarto-pandoc-types`, or anything else hub-client depends on). This is the step that matches CI's `-D warnings` strictness; plain `cargo build` / `cargo nextest` from steps 2 and 3 do not. 5. Ask the user for permission before pushing 6. Only push after receiving explicit approval diff --git a/claude-notes/plans/2026-03-16-extensions-grand-plan.md b/claude-notes/plans/2026-03-16-extensions-grand-plan.md index 7cd7f4ecd..f02868b83 100644 --- a/claude-notes/plans/2026-03-16-extensions-grand-plan.md +++ b/claude-notes/plans/2026-03-16-extensions-grand-plan.md @@ -447,14 +447,18 @@ shortcode processing pipeline. Includes block-level shortcode support and a new **Goal**: Extensions can provide custom execution engines. -- [ ] Parse `contributes.engines` from extensions -- [ ] Register engines in the engine execution stage -- [ ] Support external engine paths -- [ ] Tests - -**Open questions**: -- What engines does q2 currently support? -- How is the engine selection mechanism implemented? +**Status**: Superseded by the TypeScript Engine Extensions grand plan: +`claude-notes/plans/2026-04-16-ts-engine-extensions-subprocess.md` + +That plan covers engine extension parsing (adding `engines` to `Contributes`), +subprocess-based execution via Deno, engine discovery/claiming, and registration. +Plan 1 Phase 1D within it handles the `_extension.yml` parsing and `EngineRegistry` +integration that was originally scoped here. + +**Answered open questions**: +- q2 currently supports markdown, knitr, and jupyter engines (all built-in) +- Engine selection uses metadata-based detection (`detect_engine()` in `detection.rs`); + the TS engine plan adds a 4-phase claiming algorithm (file ext → YAML → language scan → fallback) ### Phase 9: Embedded Extensions diff --git a/claude-notes/plans/2026-04-16-julia-validation.md b/claude-notes/plans/2026-04-16-julia-validation.md new file mode 100644 index 000000000..f66fee2a1 --- /dev/null +++ b/claude-notes/plans/2026-04-16-julia-validation.md @@ -0,0 +1,203 @@ +# Plan 4: Julia Engine Validation + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Plans 1, 2, and 3 (all must be substantially complete) +**Blocks:** Nothing (this is the final validation plan) +**Estimated sessions:** 1-2 + +## Overview + +End-to-end validation of the TypeScript engine extension system using the Julia engine from Quarto 1. Take the real `julia-engine.ts`, set it up as a q2 extension, and render documents with Julia code cells. + +This plan is primarily integration debugging. If Plans 1a, 1b, 1c, 2, and 3 are solid and the echo engine test from Plan 1c Phase 3 passes, most of the infrastructure works. This plan surfaces the gaps specific to a real-world engine extension. + +## Prerequisites + +- [ ] Plans 1a, 1b, and 1c complete: Rust subprocess infrastructure + Deno harness + extension integration, echo engine passes +- [ ] Plan 2 complete: `@quarto/api` package with text/markdown/format/path/system/console/crypto subpaths, all QuartoAPI namespaces except `jupyter` wired in +- [ ] Plan 3 complete: `@quarto/api/jupyter` with `toMarkdown` working and wired into engine-host +- [ ] Julia installed on the test machine (`julia` in PATH) + +## Work Items + +### Phase 4A: Set up Julia engine extension + +- [ ] Copy Julia engine from Quarto 1's **source/development version** (NOT the pkg-working version): + ``` + ~/src/quarto-cli/src/resources/extension-subtrees/julia-engine/ + ``` + Use this version because it resolves resource files via `import.meta.url` (relative to the JS file), rather than the distributed version which uses `quarto.path.resource()` pointing to Quarto's global `share/` directory. + +- [ ] Create test fixture with engine source AND its resource files: + ``` + tests/fixtures/extensions/julia-engine/ + _extension.yml + src/ + julia-engine.ts + constants.ts + Project.toml ← Julia environment definition + ensure_environment.jl ← Julia setup script + quartonotebookrunner.jl ← Julia execution entry point + start_quartonotebookrunner_detached.jl ← Daemon launcher + ``` + The .jl files and Project.toml live alongside the extension (same directory or parent) so that `dirname(import.meta.url)` resolves them. This matches Quarto 1's development-mode layout where the extension is self-contained. +- [ ] Write `_extension.yml`: + ```yaml + title: Julia Engine + author: Quarto + version: 1.0.0 + contributes: + engines: + - name: julia + path: src/julia-engine.ts + ``` +- [ ] Identify needed modifications to `julia-engine.ts`: + - Import paths: change `@quarto/types` imports if our type names differ + - API calls: verify all `quarto.*` calls match our implementation signatures + - Resource resolution: verify `import.meta.url`-based paths work after bundling (the bundled .js file's URL determines the base directory — resource files must be relative to where the bundle is loaded from) + - Deno APIs: verify `Deno.Command`, `Deno.connect`, `crypto.subtle`, file I/O all work (they should — it's running in real Deno) + - Standard library imports: `"path"`, `"fs/exists"`, `"encoding/base64"` — resolved at build time via the import map +- [ ] Document every modification in a compatibility log + +### Phase 4B: Minimal Julia render + +The simplest possible Julia document. + +- [ ] Create test document: + ```markdown + --- + engine: julia + --- + + ```{julia} + 1 + 1 + ``` + ``` +- [ ] Run through q2's render pipeline. Use `cargo run -- render ` (the `quarto` crate at `crates/quarto/` is the main CLI binary). Check existing smoke tests in `crates/quarto/tests/` for how integration tests invoke rendering programmatically. +- [ ] Debug the first failure. Common failure checklist: + - [ ] Extension not discovered → `_extension.yml` parsing issue + - [ ] Deno subprocess won't start → Deno not in PATH, or engine-host-deno bundle issue + - [ ] Engine module fails to load → import resolution, transpilation issue + - [ ] `engine.init()` fails → QuartoAPI construction issue + - [ ] `engine.launch()` fails → EngineProjectContext mismatch + - [ ] Julia process won't start → `Deno.Command` issue, Julia not in PATH + - [ ] Julia server connection fails → TCP connect issue, HMAC auth issue + - [ ] Execution succeeds but output is wrong → `toMarkdown()` issue + - [ ] Result deserialization fails → protocol/type mismatch +- [ ] Iterate until the simple document renders successfully +- [ ] Verify output HTML contains the result `2` + +### Phase 4C: Julia with figures + +- [ ] Create test document with a plot: + ```markdown + --- + engine: julia + --- + + ```{julia} + using Plots + plot(1:10, rand(10)) + ``` + ``` +- [ ] Verify: + - [ ] Figure file generated in `_files/` directory + - [ ] Figure referenced correctly in output markdown + - [ ] `supporting` files tracked in `ExecuteResult` + - [ ] HTML output renders with the figure + +### Phase 4D: Multiple cells and error handling + +- [ ] Test multiple code cells: + ```markdown + --- + engine: julia + --- + + ```{julia} + x = 42 + ``` + + ```{julia} + println("x is $x") + ``` + ``` +- [ ] Verify state persists between cells (x defined in first, used in second) + +- [ ] Test error handling: + ```markdown + --- + engine: julia + --- + + ```{julia} + error("this should fail gracefully") + ``` + ``` +- [ ] Verify error produces a useful message, not a crash + +### Phase 4E: Julia-specific features + +- [ ] Test daemon mode (`execute.daemon: true`) — Julia server stays alive +- [ ] Test `exeflags` option — arguments passed to Julia +- [ ] Test `env` option — environment variables set for Julia +- [ ] Test cell options: `echo: false`, `output: false`, `warning: false` + +### Phase 4F: Regression audit + +- [ ] Run same test documents through Quarto 1 for comparison +- [ ] Document output differences +- [ ] Verify all existing q2 tests pass (`cargo nextest run --workspace`) +- [ ] Run `cargo xtask verify` for full validation +- [ ] File issues (via `br create`) for any gaps discovered + +### Phase 4G: Adaptation documentation + +- [ ] Write a summary of all changes needed to `julia-engine.ts` +- [ ] Categorize changes: + - Import path adjustments + - API signature differences + - Missing QuartoAPI methods (if any were stubbed) + - Behavioral differences +- [ ] This becomes the basis for documentation for extension authors migrating from Quarto 1 + +## Design Notes + +### Debugging approach + +The subprocess architecture helps debugging — you can run the Deno engine-host independently: + +```bash +# Run engine-host manually for debugging +echo '{"type":"init","enginePath":"./julia-engine.ts","context":{...}}' | \ + deno run --allow-all ts-packages/quarto-engine-host-deno/src/host.ts +``` + +You can also add `console.error()` statements in the engine or harness and see them on stderr. + +### Standard library imports + +The Julia engine imports `"path"`, `"fs/exists"`, `"encoding/base64"` from Deno's standard library. Following Quarto 1's approach, these are resolved at **build time** via the import map (`"path"` → `jsr:@std/path`, etc.) and inlined into the bundled `.js` file. At runtime, no import resolution is needed. + +The build step for the Julia engine fixture: +```bash +deno bundle --config=resources/extension-build/deno.json julia-engine.ts > julia-engine.js +``` + +### CI gating + +Julia engine tests should be: +- Gated behind a feature flag or test tag (Julia may not be installed in CI) +- Run manually during development +- Optionally run in CI if Julia is available + +## Success Criteria + +- [ ] Julia engine extension discovered and loaded by q2 +- [ ] Simple Julia code cell executes and produces correct output +- [ ] Figure generation works +- [ ] Multiple cells with shared state work +- [ ] Error handling produces useful messages +- [ ] All modifications to julia-engine.ts documented +- [ ] No regressions in existing tests +- [ ] `cargo xtask verify` passes diff --git a/claude-notes/plans/2026-04-16-plan1a-protocol-and-core.md b/claude-notes/plans/2026-04-16-plan1a-protocol-and-core.md new file mode 100644 index 000000000..bf84c0dd9 --- /dev/null +++ b/claude-notes/plans/2026-04-16-plan1a-protocol-and-core.md @@ -0,0 +1,925 @@ +# Plan 1a: Protocol & Rust Core Infrastructure + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Plan 0 (SourceInfo in ExecutionContext, source_map serialization format) +**Blocks:** Plan 1b (Deno harness — needs the frozen JSON protocol schema from +Phase 1), Plan 1c (extension integration — needs `TsEngine` + trait extensions), +Plans 2 and 3 (via Plan 1b). +**Estimated sessions:** 1-2 + +## Overview + +Build the Rust-side infrastructure for TypeScript engine extensions: the +JSON protocol types, Deno subprocess management, `ExecutionEngine` trait +extensions, and the `TsEngine` struct. + +The Deno-side harness (`@quarto/engine-host-deno`) is Plan 1b — a separate +plan because once the JSON protocol schema is frozen (Phase 1 below), the +Rust-side work and the Deno-side harness are independent. + +After this plan plus Plan 1b, you can spawn a Deno subprocess, send it +protocol messages covering the full `ExecutionEngineInstance` lifecycle, +and receive typed responses. Plan 1c wires this into the extension system +and detection pipeline. + +## Phase order + +Phase 1 → Phase 2 → Phase 3 → Phase 4 + +Phase 1 freezes the JSON protocol schema. Plan 1b can begin in parallel +with Phases 2-4 once Phase 1 is done. + +## Work Items + +### Phase 1: JSON protocol types + +Define the message types used between Rust and Deno. Both sides need matching definitions. + +- [ ] Create `crates/quarto-core/src/engine/ts_protocol.rs` with the protocol + messages. The protocol covers discovery, file conversion, execute, + post-execute, and query phases. + + **Design principle (from Plan 0 discussion):** q2 owns the rendering + pipeline — parsing, include expansion, AST serialization. The engine + owns file-format-specific knowledge (percent scripts, spin scripts) + and code execution. `markdownForFile` and `partitionedMarkdown` are + in the protocol because the engine knows how to read its own file + formats and may run filters (ipynb-filters) before partitioning. + `target()` is harness-internal — q2 constructs execution targets + from its AST natively. + + **Shared subprocess model:** One Deno process hosts all TS engine extensions. + Every message (except `Shutdown`) carries an `engine: String` field to route + to the correct engine. `Init` is called once per engine loaded into the + subprocess. This avoids spawning N Deno processes (one per engine) — important + because Julia is bundled, so anyone with an additional TS engine has at least + two. See the "Shared subprocess" design note below. + + ```rust + // Rust → Deno messages + // + // All variants except Shutdown carry `engine: String` for routing + // in the shared subprocess. The engine name comes from EngineMeta + // (returned in Ready after Init). + #[derive(Serialize)] + #[serde(tag = "type")] + enum ToEngine { + // === Lifecycle === + // Init is called once per engine. Multiple engines coexist in + // one subprocess. The harness loads the module, calls init/launch, + // and tracks the instance keyed by engine name. + #[serde(rename = "init")] + Init { engine_path: String, context: EngineHostContext }, + #[serde(rename = "shutdown")] + Shutdown, // shuts down the entire subprocess (all engines) + + // === Discovery (ExecutionEngineDiscovery) === + #[serde(rename = "claimsLanguage")] + ClaimsLanguage { engine: String, language: String, first_class: Option }, + #[serde(rename = "claimsFile")] + ClaimsFile { engine: String, file: String, ext: String }, + + // === File conversion === + #[serde(rename = "markdownForFile")] + MarkdownForFile { engine: String, file: String }, + + // === Optional instance methods === + #[serde(rename = "partitionedMarkdown")] + PartitionedMarkdown { engine: String, file: String, format: Option }, + // Note: target() is harness-internal, not a protocol message. + // The harness checks if the engine implements target(), calls it + // if so, and uses the result (including the opaque `data` cookie) + // to build the ExecutionTarget for execute(). All on the Deno side — + // q2 never sees target() results or the engine cookie. + + // === Execute === + #[serde(rename = "execute")] + Execute { engine: String, options: TsExecuteOptions }, + + // === Post-execute === + #[serde(rename = "dependencies")] + Dependencies { engine: String, options: TsDependenciesOptions }, + #[serde(rename = "postprocess")] + Postprocess { engine: String, options: TsPostProcessOptions }, + #[serde(rename = "postRender")] + PostRender { engine: String, file: TsRenderResultFile }, + + // === Queries === + #[serde(rename = "canKeepSource")] + CanKeepSource { engine: String, target: TsExecutionTarget }, + #[serde(rename = "intermediateFiles")] + IntermediateFiles { engine: String, input: String }, + #[serde(rename = "filterFormat")] + FilterFormat { engine: String, source: String, options: TsRenderOptions, format: TsFormatInfo }, + #[serde(rename = "executeTargetSkipped")] + ExecuteTargetSkipped { engine: String, target: TsExecutionTarget, format: TsFormatInfo }, + } + + // Deno → Rust messages + #[derive(Deserialize)] + #[serde(tag = "type")] + enum FromEngine { + // === Lifecycle === + #[serde(rename = "ready")] + Ready { engine_meta: EngineMeta }, + #[serde(rename = "error")] + Error { message: String, stack: Option }, + + // === Discovery (separate response types) === + #[serde(rename = "claimsLanguageResult")] + ClaimsLanguageResult { result: Option }, + #[serde(rename = "claimsFileResult")] + ClaimsFileResult { result: bool }, + + // === File conversion === + #[serde(rename = "markdownForFileResult")] + MarkdownForFileResult { result: TsMappedStringWithMap }, + + // === Optional instance methods === + #[serde(rename = "partitionedMarkdownResult")] + PartitionedMarkdownResult { result: TsPartitionedMarkdown }, + + // === Execute === + #[serde(rename = "executeResult")] + ExecuteResult { result: TsExecuteResult }, + + // === Post-execute === + #[serde(rename = "dependenciesResult")] + DependenciesResult { result: TsDependenciesResult }, + #[serde(rename = "postprocessResult")] + PostprocessResult, // void return + #[serde(rename = "postRenderResult")] + PostRenderResult, // void return + + // === Queries === + #[serde(rename = "canKeepSourceResult")] + CanKeepSourceResult { result: bool }, + #[serde(rename = "intermediateFilesResult")] + IntermediateFilesResult { result: Option> }, + #[serde(rename = "filterFormatResult")] + FilterFormatResult { result: TsFormatInfo }, + #[serde(rename = "executeTargetSkippedResult")] + ExecuteTargetSkippedResult, // void return + } + ``` + + **Quarto 1 `ExecutionEngineInstance` coverage:** + + | Method | Protocol message | Notes | + |--------|-----------------|-------| + | `markdownForFile(file)` | `MarkdownForFile` → `MarkdownForFileResult` | Non-QMD files only (percent scripts, etc.). For QMD files, q2 handles parsing directly. | + | `target(file, quiet, md)` | **Harness-internal** | Not a protocol message or Rust trait method. The harness checks if the TS engine implements `target()`, calls it if so, uses the result (including opaque `data` cookie) to build `ExecutionTarget` for `execute()`. All Deno-side — q2 never sees target() results. If engine doesn't implement it, harness constructs from `TsExecuteOptions` fields. | + | `partitionedMarkdown(file, fmt)` | `PartitionedMarkdown` → `PartitionedMarkdownResult` | **Optional, also on Rust `ExecutionEngine` trait.** Needed for ipynb-filter YAML harvest and project indexing. Default impl: `partition(markdown_for_file(file).value)`. Jupyter overrides for ipynb-filter support. See [ipynb-filters research plan](2026-04-23-ipynb-filters-and-engine-partitioning.md). | + | `filterFormat(src, opts, fmt)` | `FilterFormat` → `FilterFormatResult` | Optional; format typed as `TsFormatInfo` | + | `execute(options)` | `Execute` → `ExecuteResult` | Core execution | + | `executeTargetSkipped(tgt, fmt)` | `ExecuteTargetSkipped` → `ExecuteTargetSkippedResult` | Notification, void return | + | `dependencies(options)` | `Dependencies` → `DependenciesResult` | Resolve widget/JS deps | + | `postprocess(options)` | `Postprocess` → `PostprocessResult` | HTML preservation restore, etc. | + | `canKeepSource(target)` | `CanKeepSource` → `CanKeepSourceResult` | Simple boolean query | + | `intermediateFiles(input)` | `IntermediateFiles` → `IntermediateFilesResult` | File list query | + | `run(options)` | **Not included** | Interactive mode — fundamentally different (long-running, not request/response). Defer to a future plan. | + | `postRender(file)` | `PostRender` → `PostRenderResult` | Post-render hook | +- [ ] Define `EngineHostContext` struct. This is a q2 invention (Quarto 1 engines + run in-process and don't need serialized context). It carries only static/global + and project-level information — per-document and per-format info arrives in + per-call messages like `TsExecuteOptions`. See the Protocol Data Types appendix + for the full struct definition. +- [ ] Define protocol data types — all strongly typed, no `serde_json::Value`. + Every field that crosses the protocol boundary has a defined Rust type. + See the **Protocol Data Types** appendix at the end of this file for the full + struct definitions. +- [ ] Write unit tests for serialization/deserialization round-trips. One test per message + type — each test constructs the Rust struct, serializes to JSON, and verifies the JSON + shape matches what the Deno side expects. Then deserializes back and checks equality. + + **Message envelope tests** (verify `type` tag and camelCase field names): + - Test each `ToEngine` variant: `Init`, `Shutdown`, `ClaimsLanguage`, `ClaimsFile`, + `MarkdownForFile`, `PartitionedMarkdown`, `Execute`, `Dependencies`, + `Postprocess`, `PostRender`, `CanKeepSource`, `IntermediateFiles`, + `FilterFormat`, `ExecuteTargetSkipped` + - Test each `FromEngine` variant: `Ready`, `Error`, `ClaimsLanguageResult`, + `ClaimsFileResult`, `MarkdownForFileResult`, `PartitionedMarkdownResult`, + `ExecuteResult`, `DependenciesResult`, `PostprocessResult`, + `PostRenderResult`, `CanKeepSourceResult`, `IntermediateFilesResult`, + `FilterFormatResult`, `ExecuteTargetSkippedResult` + + **Data type round-trip tests:** + - `EngineMeta` — all fields populated + - `EngineHostContext` — with and without project_dir + - `TsMappedStringWithMap` — with and without file_name, with source_map entries + - `TsSourceMapEntry` — verify serialization of byte-range pieces + - `TsFormatInfo` — with categorized HashMap sections populated + - `TsFormatIdentifier` — all fields + - `TsPartitionedMarkdown` — all fields populated, with and without yaml/heading + - `TsExecutionTarget` — with nested `TsMappedString` and `TsMetadataValue` map + - `TsMetadataValue` — each variant (String, Bool, Number, Array, Map, Null) + - `TsExecuteOptions` — verify metadata map and source_map serialization + - `TsExecuteResult` — all optional fields present, then all absent + - `TsWidgetDependency` — scripts with/without attribs + - `TsPandocIncludes` — all three include locations + - `TsDependenciesOptions` / `TsDependenciesResult` + - `TsPostProcessOptions` — with and without preserve map + - `TsRenderResultFile` — with and without supporting files + - `TsRenderOptions` / `TsPandocFlags` + - `TsPandocAttr` — with classes and keyvalue pairs + + **Error handling tests:** + - Malformed JSON → clear parse error + - Unknown `type` tag → clear "unknown message" error + - Missing required field → clear serde error with field name + - Wrong type for a field (e.g., string where bool expected) → clear error +- [ ] Define `TsExecuteOptions` — this bridges q2's API to Quarto 1's API: + + **q2 side:** `ExecutionEngine::execute(input: &str, ctx: &ExecutionContext)` receives a QMD string (serialized from the AST after include expansion) and a context with SourceInfo (from Plan 0). + + **Quarto 1 side:** The TS engine expects `ExecuteOptions` containing: + - `target: ExecutionTarget` — `{ source, input, markdown: MappedString, metadata }` + - `format: Format` — nested object with `pandoc.to`, `execute.*` (daemon, cache), figure options, etc. + - `resourceDir`, `tempDir`, `libDir`, `projectDir`, `cwd`, `params`, `quiet` + + The Deno harness bridges this: it receives `TsExecuteOptions` from q2, wraps the QMD text as a `MappedString`, and constructs the `ExecutionTarget` and `Format` objects the engine expects. Unlike the original plan, the harness does NOT need to call `quarto.markdownRegex.extractYaml()` — q2 provides the pre-extracted metadata directly. + + `TsExecuteOptions` should include: + ```rust + struct TsExecuteOptions { + input: String, // QMD text (serialized from AST) + source_path: String, // original file path + metadata: HashMap, // pre-extracted from AST by q2 + format: TsFormatInfo, // typed format (defined above) + temp_dir: String, + cwd: String, + project_dir: Option, + lib_dir: Option, + quiet: bool, + dependencies: bool, // whether to resolve deps inline + handled_languages: Vec, // languages handled by cell handlers + params: Option>, + // Byte-range source map from Plan 0's SourceInfo::Concat, + // flattened. Maps byte ranges in `input` back to byte ranges + // in original source files (through include expansion). + // Always provided by q2. The engine-host harness uses this + // to construct a proper MappedString with provenance — the + // same semantics as Quarto 1's in-process MappedString, but + // serialized across the protocol boundary. + source_map: Vec, + } + + struct TsSourceMapEntry { + start: usize, // byte offset in serialized QMD + length: usize, // byte length of this piece + file: String, // original source file path + file_offset: usize, // byte offset in the original file + } + ``` + + `TsFormatInfo` (defined in the protocol data types appendix) uses categorized + `HashMap` sections (execute, render, pandoc, metadata). + q2 constructs this by extracting keys from the merged `ConfigValue` metadata + using Quarto 1's key classification lists (kExecuteDefaultsKeys, etc.). The Deno + harness maps `TsFormatInfo` to Quarto 1's `Format` interface so the engine sees + familiar field names — the mapping is trivial since the section structure already + matches. Any new config key automatically flows through without protocol changes. + + Fields used by the Julia engine (our validation target): + - `format.execute["daemon"]`, `["fig-format"]`, `["fig-dpi"]` + - `format.render["keep-hidden"]`, `["fig-pos"]`, `["produce-source-notebook"]` + - `format.pandoc["to"]` + - The whole `format.execute` map (passed to `jupyter.toMarkdown`) + - The whole `format.pandoc` map (passed to format detection helpers) + + See `~/src/quarto-cli/src/resources/extension-subtrees/julia-engine/src/julia-engine.ts`. + +### Phase 2: Shared subprocess management + +Spawn and manage the shared Deno subprocess that hosts all TS engine extensions. + +- [ ] Define `EngineTransport` trait in `crates/quarto-core/src/engine/ts_process.rs`: + ```rust + /// Transport abstraction for the engine subprocess protocol. + /// Currently only StdioTransport exists. A future WebSocketTransport + /// would enable running the engine host as a standalone server + /// (e.g., for WASM hub-client needing filesystem/process access). + pub trait EngineTransport: Send { + fn send(&mut self, msg: &ToEngine) -> Result<()>; + fn recv(&mut self) -> Result; + fn shutdown(&mut self) -> Result<()>; + } + ``` + +- [ ] Implement `StdioTransport`: + ```rust + pub struct StdioTransport { + child: std::process::Child, + stdin: BufWriter, + stdout: BufReader, + } + impl EngineTransport for StdioTransport { /* JSON lines over stdio */ } + ``` + +- [ ] Create `TsEngineHost` — the shared subprocess manager: + ```rust + pub struct TsEngineHost { + transport: Mutex>>, + } + + impl TsEngineHost { + pub fn new() -> Self; + /// Ensure the subprocess is running. Lazily spawns on first call. + pub fn ensure_started(&self) -> Result<()>; + /// Load an engine into the running subprocess. Sends Init, + /// receives Ready with EngineMeta. Can be called multiple times + /// for different engines. + pub fn init_engine(&self, engine_path: &Path, ctx: &EngineHostContext) + -> Result; + /// Send a message routed to a specific engine (by name). + pub fn send(&self, msg: &ToEngine) -> Result<()>; + /// Receive the next response. + pub fn recv(&self) -> Result; + /// Shut down the entire subprocess (all engines). + pub fn shutdown(&self) -> Result<()>; + } + ``` + The `TsEngineHost` is **shared across all TS engines** in a project render. + It is owned by the `EngineRegistry` (in `StageContext`) as an + `Arc`. Each `TsEngine` holds a clone of the `Arc`. + +- [ ] Spawn Deno with: `deno run --allow-all ` + - `--allow-all` because engine extensions need file/net/process access + - Consider more granular permissions later +- [ ] Handle Deno not being installed: check PATH, clear error message +- [ ] Handle process crashes: detect unexpected EOF on stdout, report error +- [ ] Handle timeouts: execution timeout defaults to 5 minutes. Configurable via + `execute.timeout` in `_quarto.yml` or document frontmatter (in seconds). + The timeout applies to individual `execute` calls, not to the subprocess lifetime. + Discovery queries (`claimsLanguage`, `claimsFile`) use a shorter fixed timeout (10s). +- [ ] Forward stderr to q2's log output in real-time (spawn a reader thread or use async) +- [ ] Write test: spawn a simple Deno script, send/receive multiple messages on the same process + +### Phase 3: ExecutionEngine trait — discovery + full lifecycle methods + +Extend the `ExecutionEngine` trait with discovery methods AND the full +`ExecutionEngineInstance` lifecycle. This enables ALL engines (built-in and TS +extensions) to participate in the same claiming system and execution pipeline. + +**Quarto 1 references:** +- `ExecutionEngineDiscovery` in `src/execute/types.ts` — discovery interface +- `ExecutionEngineInstance` in `src/execute/types.ts` — full lifecycle interface + +- [ ] Add **discovery methods** to `ExecutionEngine` trait with defaults: + ```rust + fn valid_extensions(&self) -> Vec { Vec::new() } + fn claims_language(&self, _language: &str, _first_class: Option<&str>) -> Option { None } + fn claims_file(&self, _file: &str, _ext: &str) -> bool { false } + ``` + +- [ ] Add **file conversion method** with default: + ```rust + /// Convert a non-QMD file to QMD text. Called only for files this + /// engine claimed via `claims_file`. For QMD files, q2 handles + /// parsing directly and this method is never called. + /// + /// Returns (qmd_text, optional_filename_for_source_tracking). + fn markdown_for_file(&self, _file: &Path) -> Result<(String, Option), ExecutionError> { + Err(ExecutionError::NotSupported("markdown_for_file")) + } + ``` + + **Not on Rust trait** (protocol-only for TS engines): + - `target()` — q2 constructs execution target data from its AST. TS + engines may implement it for Quarto 1 API compat (transient notebooks, + kernelspec). The harness builds the `ExecutionTarget` from + `TsExecuteOptions` fields when the engine doesn't implement it. + +- [ ] Define `PartitionedMarkdown` Rust struct (in `ts_protocol.rs` or a shared types module): + ```rust + /// A file's markdown split into structured parts. + /// Rust equivalent of Quarto 1's PartitionedMarkdown. + pub struct PartitionedMarkdown { + /// Parsed YAML frontmatter (None if no frontmatter present). + pub yaml: Option, + /// Text of the first heading (None if no heading). + pub heading_text: Option, + /// Pandoc attributes of the first heading (id, classes, key-values). + pub heading_attr: Option, + /// Whether the document contains crossref references. + pub contains_refs: bool, + /// The markdown body (after yaml block and first heading). + pub markdown: String, + /// Full markdown text with the yaml block removed. + pub src_markdown_no_yaml: String, + } + ``` + `yaml` uses `ConfigValue` (q2-native) rather than `HashMap` + (protocol type). `TsEngine` converts at the boundary: `TsPartitionedMarkdown` → + `PartitionedMarkdown` (converting `TsMetadataValue` → `ConfigValue` for `yaml`). + +- [ ] Add **partitioned markdown method** to the trait with default: + ```rust + /// Partition a file's markdown into yaml/heading/body. + /// Intended default: calls markdown_for_file then partitions the result. + /// Jupyter will override to run ipynb-filters when format is provided. + /// See ipynb-filters research plan for full details. + fn partitioned_markdown(&self, _file: &Path, _format: Option<&TsFormatInfo>) + -> Result { + todo!("partition_markdown not yet implemented — see ipynb-filters research plan R2") + } + ``` + The default impl uses `todo!()` because the `partition_markdown()` utility + function is deferred to the ipynb-filters research plan (R2). No callers + exist yet in q2's pipeline. `TsEngine` never hits this default — it + forwards to the subprocess if the engine reports `has_partitioned_markdown`, + and falls back to the harness-side `partition(markdownForFile(file).value)` + otherwise. + +- [ ] Add **post-execute lifecycle methods** with defaults: + ```rust + fn filter_format(&self, _source: &str, _options: &TsRenderOptions, + format: TsFormatInfo) -> Result { + Ok(format) // default: pass through unchanged + } + + fn execute_target_skipped(&self, _target: &TsExecutionTarget, + _format: &TsFormatInfo) -> Result<(), ExecutionError> { + Ok(()) + } + + fn dependencies(&self, _options: &TsDependenciesOptions) + -> Result { + Ok(TsDependenciesResult::default()) + } + + fn postprocess(&self, _options: &TsPostProcessOptions) -> Result<(), ExecutionError> { + Ok(()) + } + + fn can_keep_source(&self, _target: &TsExecutionTarget) -> bool { true } + + fn post_render(&self, _file: &TsRenderResultFile) -> Result<(), ExecutionError> { + Ok(()) + } + ``` + + Note: Methods that q2's pipeline doesn't call yet still get trait definitions + and protocol messages so that (a) TsEngine can forward them, (b) we have + thorough unit tests, and (c) future pipeline work can call them without + protocol changes. + +- [ ] Implement on built-in engines: + - **JupyterEngine**: `valid_extensions() → [".ipynb"]`, `claims_file` for `.ipynb` and percent scripts. No `claims_language` overrides (returns `None` for all languages, matching Quarto 1 where Python also relied on the Phase 4 fallback). **Deliberate q2 interface change:** Jupyter no longer claims "julia" explicitly (Quarto 1 did this as a backward-compatibility hack), removing the priority conflict with the Julia extension. Jupyter still handles all unclaimed computational languages via the Phase 4 fallback, so `{julia}` blocks without the Julia extension still work via Jupyter's kernel. + - **KnitrEngine**: `claims_language("r") → Some(1)`, `valid_extensions() → [".rmd", ".rmarkdown"]`, `claims_file` for `.rmd`/`.rmarkdown` + - **MarkdownEngine**: returns defaults (claims nothing) + - Built-in engines use the default implementations for the lifecycle methods + (they have their own native implementations that don't go through the protocol). +- [ ] Update `EngineMeta` (from init response) to include `validExtensions: Vec` so TsEngine can implement `valid_extensions()` +- [ ] Write tests for built-in engine claiming +- [ ] Write tests for default lifecycle method behavior (NotSupported errors, pass-throughs) + +### Phase 4: TsEngine struct + +The Rust struct that implements `ExecutionEngine` by delegating to the shared subprocess. + +- [ ] Create `crates/quarto-core/src/engine/ts_engine.rs`: + ```rust + pub struct TsEngine { + name: String, + bundle_path: PathBuf, // Path to the bundled .js file + host: Arc, // Shared subprocess (from EngineRegistry) + engine_meta: Option, // Cached after init + initialized: AtomicBool, // Whether Init has been sent for this engine + // Static hints from _extension.yml (see Plan 1c). + // Used to skip launching the subprocess when the engine is + // clearly irrelevant. Empty = no hint, always consult dynamically. + language_hints: Vec, + file_extension_hints: Vec, + } + ``` + `TsEngine` does NOT own the subprocess — it shares `TsEngineHost` with other + TS engines via `Arc`. The `Mutex` is inside `TsEngineHost`, not `TsEngine`. + `Send + Sync` is satisfied because `Arc` is `Send + Sync` and + `AtomicBool` is lock-free. + +- [ ] Implement lifecycle methods (not part of `ExecutionEngine` trait — called by the project render orchestration): + - `ensure_initialized(&self)` — ensures the shared subprocess is running + (calls `host.ensure_started()`) AND this engine is loaded (calls + `host.init_engine()` if not yet initialized). Called before discovery + queries or execution. + - Shutdown is on `TsEngineHost`, not per-engine. Called at end of project + render by the `EngineRegistry` (which owns the `Arc`). + +- [ ] Implement `ExecutionEngine` trait — all methods delegate to the shared + subprocess via protocol messages. Each method calls `ensure_initialized()` + first (which ensures both the subprocess and this engine are ready): + + **Existing trait methods:** + - `name()` → `self.name` (from `EngineMeta`, no subprocess call) + - `execute(input, ctx)` → send `Execute`, recv `ExecuteResult`, convert to q2's `ExecuteResult` + - `can_freeze()` → from `self.engine_meta` (no subprocess call) + - `is_available()` → check Deno in PATH + bundle file exists (no subprocess call) + - `intermediate_files(input_path)` → send `IntermediateFiles`, recv result + + **Discovery methods (defined in Phase 3 above):** + - `valid_extensions()` → from `EngineMeta` (no subprocess call, cached from init) + - `claims_language(language, first_class)` → **static pre-filter first**: if + `language_hints` is non-empty and language isn't in the list, return `None` + without touching the subprocess. Otherwise, send `ClaimsLanguage`, recv + `ClaimsLanguageResult`. Harness converts JS `false` → `null`, `true` → `1`, + number → `Math.trunc()` to `i32`. Negative values allowed. + - `claims_file(file, ext)` → **static pre-filter first**: if + `file_extension_hints` is non-empty and ext isn't in the list, return `false` + without touching the subprocess. Otherwise, send `ClaimsFile`, recv + `ClaimsFileResult`. + - Cache `claims_language` results: deterministic, so cache `(language, first_class) → result` + + **File conversion (defined in Phase 3 above):** + - `markdown_for_file(file)` → send `MarkdownForFile`, recv `MarkdownForFileResult`. + Called only for non-QMD files claimed via `claims_file`. For QMD input, this + method is never called — q2 handles parsing directly. + - `partitioned_markdown(file, format)` → send `PartitionedMarkdown`, recv + `PartitionedMarkdownResult` if engine reports `has_partitioned_markdown`. + Otherwise, use the Rust-side default (which is `todo!()` for now — + no callers exist yet in q2's pipeline). + + **Post-execute lifecycle methods (defined in Phase 3 above):** + - `filter_format(source, options, format)` → send `FilterFormat`, recv result. Optional — default impl returns format unchanged. + - `dependencies(options)` → send `Dependencies`, recv `DependenciesResult` + - `postprocess(options)` → send `Postprocess`, recv `PostprocessResult` + - `post_render(file)` → send `PostRender`, recv `PostRenderResult`. Optional. + - `can_keep_source(target)` → send `CanKeepSource`, recv `CanKeepSourceResult` + - `execute_target_skipped(target, format)` → send `ExecuteTargetSkipped`, recv result + + Note: `run()` is excluded from the protocol — it's fundamentally different (long-running + interactive mode, not request/response). Deferred to a future plan. + +- [ ] Wire into engine module (`engine/mod.rs`): add `ts_engine`, `ts_process`, + `ts_protocol` modules behind `#[cfg(not(target_arch = "wasm32"))]` (same gate + as knitr/jupyter). Re-export `TsEngine` and `TsEngineHost` from `engine/mod.rs`. +- [ ] The `Mutex` lives inside `TsEngineHost` (on the transport), not in `TsEngine`. + `Mutex` (not `RwLock`) is correct since every operation needs exclusive access + (both reads and writes go through the same transport). The subprocess is + inherently single-threaded — the protocol is request-response over a single + channel. All TS engines serialize through the same Mutex. This matches + Quarto 1's behavior (engines process one file at a time). +- [ ] Write test with a mock engine (echo engine — see Plan 1c Phase 3). + A stub Deno harness is sufficient for smoke-testing the Rust side in + isolation; the full harness is Plan 1b. + +## Design Notes + +### Shared subprocess + +All TS engine extensions share **one Deno subprocess per project render**. +The subprocess is spawned lazily on first need (any TS engine's discovery +query or execute call). Each engine is loaded into the subprocess via a +separate `Init` message. The subprocess is shut down at the end of the +project render. + +This avoids spawning N Deno processes — important because Julia is bundled, +so anyone with an additional TS engine extension has at least two. Each +Deno process costs ~100-200ms startup and ~30-50MB memory. + +All protocol messages carry an `engine: String` field for routing to the +correct engine within the subprocess. The harness maintains a +`Map` internally. + +The lifecycle is managed by `TsEngineHost` (owned by the `EngineRegistry` +in `StageContext`). Individual `TsEngine` structs hold `Arc` +and their engine name. + +### Transport abstraction + +The `EngineTransport` trait abstracts the communication channel between +q2 and the engine-host subprocess. Currently only `StdioTransport` exists +(JSON lines over stdin/stdout of a child process). + +**Future direction — engine server:** The protocol is already +transport-agnostic (self-contained JSON messages). A future +`WebSocketTransport` would enable running the engine-host as a standalone +server. Use case: WASM hub-client running in a browser needs to execute +code via a TS engine, but can't spawn processes or access the filesystem. +A locally-running engine server with full OS access could serve this role. +Each WebSocket connection would be its own session (no request-ID +multiplexing needed). This requires no protocol changes — only a new +transport implementation and a way to start/discover the server. + +### Stderr handling (Rust side) + +The subprocess's stderr is forwarded to q2's logging. The harness +(Plan 1b) writes level-prefixed log lines (`[INFO]`, `[WARN]`, `[ERROR]`); +the Rust side parses those prefixes and routes to the appropriate log +level. Unprefixed stderr lines are logged at INFO. + +### Error categories and handling + +Following Quarto 1's approach: **errors propagate up, render fails, user sees the +message.** No silent recovery, no engine removal from the registry on failure. + +1. **Deno not found** — `is_available()` returns false before any subprocess call. + Clear error: "Deno is required for the {name} engine extension but was not found + in PATH." Render fails. +2. **Engine module load failure** — Subprocess starts but `init` message gets back an + `Error` response instead of `Ready`. Fatal for this engine. Forward the TS-side + error message (import failure, missing exports) to the user. Render fails. +3. **Discovery errors** — `claimsLanguage`/`claimsFile` throw inside the engine. + Subprocess sends `Error` response. Propagate as `ExecutionError`. Render fails. +4. **Execution failure** — `Error` response during execution. Forward the message and + optional stack trace. Matches Quarto 1 behavior. +5. **Process crash** — EOF on stdout, child process exited unexpectedly. No Quarto 1 + equivalent (in-process engines can't crash independently). Generate an + `ExecutionError` with the exit code and any stderr output captured so far. +6. **Timeout** — Execution exceeds configured limit. Kill the subprocess, report + timeout error. (Quarto 1's Julia engine handles timeouts internally; in q2, the + Rust side enforces the timeout since it controls the subprocess.) +7. **Malformed protocol** — Subprocess sends invalid JSON or unexpected message type. + This is a bug in the engine-host or engine, not a user error. Report clearly with + the raw message content for debugging. + +### Stdout/stderr contract (Rust side) + +**Stdout is exclusively for JSON protocol messages**, one per line. On +the Rust side, if a line from stdout fails to parse as JSON, report a +clear error: "Engine wrote non-protocol output to stdout. Engine +extensions must use stderr for diagnostics." See Plan 1b for the +corresponding harness-side contract (`console.*` overrides, stdout +redirection, etc.). + +### Bundle embedding + +The harness bundle (produced by Plan 1b) is embedded in the q2 binary via +`include_str!("../../ts-packages/quarto-engine-host-deno/dist/engine-host-deno.js")` +in `ts_process.rs`, gated behind `#[cfg(not(target_arch = "wasm32"))]`. +At runtime, the embedded string is written to a temp file and executed +with `deno run --allow-all `. Bundle-size considerations and +build pipeline details are in Plan 1b. + +## Success Criteria + +- [ ] Can spawn a Deno subprocess, send/receive JSON messages +- [ ] `TsEngine` implements `ExecutionEngine` (discovery, execute, post-execute, file conversion) and delegates to subprocess +- [ ] Built-in engines (knitr, jupyter) implement `claims_language` and `claims_file` +- [ ] Protocol carries full `TsExecuteResult` (includes, preserve, postProcess, engineDependencies, pandoc) +- [ ] Deno-not-installed case produces a clear error message +- [ ] Tests requiring Deno are skipped if Deno is absent. Use the same pattern + as pandoc tests: a runtime `has_deno()` helper that checks PATH, and tests + that need Deno call it and return early (effectively skipping) if absent. + No `#[ignore]` attribute — tests run but gracefully degrade. +- [ ] All existing tests pass (no regressions) +- [ ] All protocol message types have serialization round-trip tests +- [ ] (Harness dispatching is Plan 1b's success criterion, not this plan's) + +## Appendix: Protocol Data Types + +All strongly typed — no `serde_json::Value`. + +```rust +// === Shared types === + +struct EngineMeta { + name: String, + can_freeze: bool, + generates_figures: bool, + valid_extensions: Vec, + has_partitioned_markdown: bool, // engine implements partitionedMarkdown() + // Note: target() is harness-internal — the harness detects it by + // checking the loaded engine module directly, no EngineMeta flag needed. +} + +// === Engine host context (sent once at init) === +// +// q2 invention — Quarto 1 engines run in-process and don't need this. +// Carries only static/global and project-level info. Per-document and +// per-format info arrives in per-call messages (TsExecuteOptions, etc.). +struct EngineHostContext { + // Project info (→ EngineProjectContext for launch()) + project_dir: Option, + is_single_file: bool, + + // Paths for QuartoAPI construction + resource_dir: String, // q2's bundled resources + runtime_dir: String, // q2's runtime directory + pandoc_path: String, // absolute path to pandoc binary + + // System info for QuartoAPI + is_interactive_session: bool, + running_in_ci: bool, + quarto_version: String, +} + +// Simple string with optional file attribution. Used in protocol messages +// where source provenance tracking is not needed (e.g., TsExecutionTarget.markdown +// in query messages like CanKeepSource, Dependencies). +struct TsMappedString { + value: String, + file_name: Option, +} + +// Extended form used in MarkdownForFileResult (non-QMD file conversion). +// Includes source_map so that positions in the generated QMD can be +// traced back to the original file (e.g., .jl percent script). +// The Rust side converts source_map entries to SourceInfo::Concat +// and attaches it to the parsed AST. +struct TsMappedStringWithMap { + value: String, + file_name: Option, + source_map: Vec, +} + +struct TsPandocIncludes { + in_header: Option>, + before_body: Option>, + after_body: Option>, +} + +struct TsPandocAttr { + id: String, + classes: Vec, + keyvalue: Vec<(String, String)>, +} + +// === Format info === +// +// Uses categorized HashMaps rather than per-field structs. q2 extracts +// the merged ConfigValue into sections using Quarto 1's key lists +// (kExecuteDefaultsKeys, kRenderDefaultsKeys, kPandocDefaultsKeys). +// This matches Quarto 1's nested Format shape so the harness mapping +// is trivial, doesn't require per-field extraction in Rust, and +// automatically forwards any config key — if a future engine reads +// an obscure field like `execute.plotly-connected`, it just works. +struct TsFormatInfo { + identifier: TsFormatIdentifier, + execute: HashMap, // execute.* keys + render: HashMap, // render.* keys + pandoc: HashMap, // pandoc.* keys + metadata: HashMap, // everything else +} + +struct TsFormatIdentifier { + base_format: String, + target_format: String, + display_name: String, +} + +// === Execution target === + +struct TsExecutionTarget { + source: String, + input: String, + markdown: TsMappedString, + metadata: HashMap, +} + +#[serde(untagged)] +enum TsMetadataValue { + String(String), + Bool(bool), + Number(f64), + Array(Vec), + Map(HashMap), + Null, +} + +// === Source map (Plan 0 → Plan 1a bridge) === + +// Byte-range entry from q2's flattened SourceInfo::Concat. +// Used in both directions: +// - Rust→Deno: TsExecuteOptions.source_map (maps QMD text to originals) +// - Deno→Rust: TsMappedStringWithMap.source_map (maps markdownForFile +// output back to the original non-QMD file) +// +// Flattening is done on the Rust side for Rust→Deno: +// - SourceInfo::Original → resolve FileId to path via SourceContext +// - SourceInfo::Substring → walk parent chain to Original +// - SourceInfo::FilterProvenance → emit with empty file string (sentinel) +// - SourceInfo::Concat (nested) → flatten recursively +// +// On the Deno side for Deno→Rust (markdownForFile): +// - Walk the MappedString output, call .map() to find contiguous ranges +// mapping to the same file with sequential offsets, emit entries +// +// The `file` field is a path string (not numeric ID) — IDs are resolved +// on the Rust side since the Deno process doesn't have SourceContext. +struct TsSourceMapEntry { + start: usize, // byte offset in serialized QMD + length: usize, // byte length of this piece + file: String, // original source file path (empty = unmappable) + file_offset: usize, // byte offset in the original file +} + +// === Optional instance method results === + +// Returned from partitionedMarkdown() — file split into parts. +// Also on the Rust ExecutionEngine trait (Jupyter needs it for ipynb-filters). +// target() is harness-internal — its result type lives only in the TS harness. +struct TsPartitionedMarkdown { + yaml: Option>, + heading_text: Option, + heading_attr: Option, + contains_refs: bool, + markdown: String, + src_markdown_no_yaml: String, +} + +// === Execute result === + +struct TsExecuteResult { + markdown: String, + supporting: Vec, + filters: Vec, + includes: Option, + post_process: Option, + preserve: Option>, + engine_dependencies: Option>>, + pandoc: Option, +} + +struct TsWidgetDependency { + name: String, + version: String, + scripts: Vec, + stylesheets: Vec, +} + +struct TsWidgetScript { + path: Option, + attribs: Option>, + after_body: Option, +} + +// === Dependencies === + +struct TsDependenciesOptions { + target: TsExecutionTarget, + format: TsFormatInfo, + output: String, + resource_dir: String, + temp_dir: String, + project_dir: Option, + lib_dir: Option, + dependencies: Option>, + quiet: bool, +} + +struct TsDependenciesResult { + includes: TsPandocIncludes, +} + +// === Post-process === + +struct TsPostProcessOptions { + target: TsExecutionTarget, + format: TsFormatInfo, + output: String, + temp_dir: String, + project_dir: Option, + preserve: Option>, + quiet: bool, +} + +// === Post-render === + +struct TsRenderResultFile { + input: String, + markdown: String, + format: TsFormatInfo, + file: String, + supporting: Option>, + resource_files: Vec, +} + +// === Render options (for filterFormat) === + +struct TsRenderOptions { + services_temp_dir: String, + flags: TsPandocFlags, + quiet: bool, +} + +struct TsPandocFlags { + to: Option, + output: Option, + quiet: Option, +} +``` + +**Design principle:** No `serde_json::Value` in the protocol. Every field is +typed so that (a) unit tests can construct values without raw JSON strings, +(b) the Rust compiler catches field mismatches, and (c) the Deno-side +`types.ts` has a clear schema to match against. + +`TsFormatInfo` uses categorized `HashMap` sections +(execute, render, pandoc, metadata) rather than per-field structs. This matches +Quarto 1's nested `Format` shape, automatically forwards any config key, and +avoids maintaining a Rust struct with 100+ optional fields. q2 extracts keys +from the merged `ConfigValue` metadata into the correct section using Quarto 1's +key classification lists. The `TsMetadataValue` enum covers all JSON value +types so nothing is lost, but it's still a proper Rust type rather than raw +`serde_json::Value`. + +`TsExecuteResult` maps to q2's `ExecuteResult`. Fields `preserve`, +`engine_dependencies`, and `pandoc` must also be added to q2's `ExecuteResult` +struct (currently only has `includes` and `needs_postprocess`). + +**Type mapping to q2:** +- `TsPandocIncludes` ↔ q2's `PandocIncludes` (simple field rename) +- `TsMetadataValue` ↔ q2's `ConfigValue` (convert at the boundary) +- `TsFormatInfo` is protocol-only; q2 constructs it from its merged metadata +- `TsWidgetDependency` is new — will need a q2-side type when widget support is built diff --git a/claude-notes/plans/2026-04-16-plan1b-engine-host-deno.md b/claude-notes/plans/2026-04-16-plan1b-engine-host-deno.md new file mode 100644 index 000000000..560178f10 --- /dev/null +++ b/claude-notes/plans/2026-04-16-plan1b-engine-host-deno.md @@ -0,0 +1,284 @@ +# Plan 1b: @quarto/engine-host-deno (Deno harness) + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Plan 1a (Rust core: protocol types, `TsEngine`) — this +plan needs the frozen JSON protocol schema from Plan 1a Phase 1. Strictly +speaking, only the schema gates 1b; the rest of 1a (subprocess management, +trait extensions, `TsEngine` struct) runs in parallel with 1b if two +people are working. +**Blocks:** Plan 1c (extension integration + E2E echo test), Plan 2 Phase 2C +(wire QuartoAPI namespaces into the harness), Plan 3 Phase 3E (wire jupyter +into the harness), Plan 4 (Julia validation). +**Estimated sessions:** 1 + +## Overview + +Build the Deno-side subprocess harness — the TypeScript package that +receives JSON protocol messages on stdin, dispatches to a loaded engine +module, and writes typed responses to stdout. This is the counterpart to +Plan 1a's Rust-side subprocess manager. + +**Build model:** Following the existing `quarto-system-runtime` pattern (see `crates/quarto-system-runtime/js/`): +1. Source lives in `ts-packages/quarto-engine-host-deno/src/` +2. **esbuild** bundles it into a single `dist/engine-host-deno.js` (checked into git) +3. Rust embeds it via `include_str!("../../ts-packages/quarto-engine-host-deno/dist/engine-host-deno.js")` + in `ts_process.rs` (behind `#[cfg(not(target_arch = "wasm32"))]` with the rest of the module) +4. At runtime, writes the embedded JS to a temp file and runs `deno run --allow-all ` +5. Only developers editing the TS harness need to rebuild (via `npm run build` in the package) + +## Phase order + +Phase 1 → Phase 2 → Phase 3 → Phase 4 + +## Work Items + +### Phase 1: Package setup + esbuild + +- [ ] Create `ts-packages/quarto-engine-host-deno/package.json`: + ```json + { + "name": "@quarto/engine-host-deno", + "version": "0.1.0", + "type": "module", + "main": "src/host.ts", + "scripts": { + "build": "node esbuild.config.mjs" + } + } + ``` +- [ ] Create `esbuild.config.mjs` — bundle `src/host.ts` → `dist/engine-host-deno.js`. + Use `platform: "neutral"` and `format: "esm"` (NOT the `platform: "browser"` / + `format: "iife"` pattern from `quarto-system-runtime` — that targets QuickJS via + Boa, while engine-host-deno targets Deno which runs ES modules and has its own globals + like `Deno.stdout`, `Deno.Command`) +- [ ] Add `@quarto/api` and `@quarto/types` as dependencies. At this point + in the sequence, `@quarto/api` may still be a skeleton (Plan 2A) with + stubs for most namespaces — that's fine; see Phase 3. + +### Phase 2: `host.ts` main loop + +- [ ] Create `src/host.ts`: + ```typescript + // Redirect stdout so engine code can't accidentally corrupt the protocol + const protocolOut = Deno.stdout; + // Read JSON messages from stdin, dispatch, write responses to protocolOut + ``` + - Read lines from stdin, parse as JSON, dispatch by `type` field + - Write JSON response + newline to protocol stdout + - Handle errors gracefully (catch, send error message, don't crash) + +- [ ] **Must dispatch all message types** from the protocol (matching Plan 1a's + `ToEngine` enum exactly): + - `init` → load engine, call `engine.init(quartoAPI)`, call `engine.launch(context)`, return `ready` + - `claimsLanguage` / `claimsFile` → call discovery methods on loaded engine + - `markdownForFile` → call `instance.markdownForFile(file)` (non-QMD files only) + - `partitionedMarkdown` → call `instance.partitionedMarkdown(file, format?)` if + implemented; else fallback to `partition(markdownForFile(file).value)` + - `execute` → call `instance.target()` if implemented (harness-internal), + then construct `ExecutionTarget` from target result or `TsExecuteOptions` fields + (source_path, input text wrapped as MappedString, pre-extracted metadata), + construct `Format` from `TsFormatInfo`, call `instance.execute(options)` + - `filterFormat` → call `instance.filterFormat(source, options, format)` if implemented + - `executeTargetSkipped` → call `instance.executeTargetSkipped(target, format)` if implemented + - `dependencies` → call `instance.dependencies(options)` + - `postprocess` → call `instance.postprocess(options)` + - `postRender` → call `instance.postRender(file)` if implemented + - `canKeepSource` → call `instance.canKeepSource(target)` if implemented + - `intermediateFiles` → call `instance.intermediateFiles(input)` if implemented + - `shutdown` → clean up, exit + +- [ ] **`target()` is harness-internal**, not a protocol message. Before + calling `execute()`, the harness checks if the engine implements + `target()`. If so, it calls it with the reconstructed MappedString, and + uses the returned `ExecutionTarget` (including the opaque `data` cookie + like Jupyter's kernelspec). If not, the harness constructs + `ExecutionTarget` from `TsExecuteOptions` fields. Entirely Deno-side — + q2 never sees target() results. + +- [ ] **`partitionedMarkdown` dispatch** — dispatched when q2 sends the + `PartitionedMarkdown` message. If the engine implements it, call it. + If not, fall back to `partition(markdownForFile(file).value)` — calls + `markdownForFile` first (handles percent/spin conversion), then partitions. + +- [ ] For the `execute` dispatch, the harness constructs `Format` from + `TsFormatInfo` and bridges q2's data to the shapes the engine expects. + +- [ ] For optional methods (`filterFormat`, `executeTargetSkipped`, `canKeepSource`, + `intermediateFiles`, `postRender`, `run`): if the engine doesn't implement them, + return sensible defaults (pass-through format, true, empty list, void). + +### Phase 3: Supporting modules + +- [ ] Create `src/deno-host.ts` — the `PlatformHost` implementation used by + `@quarto/api` factory exports: + ```typescript + import type { PlatformHost } from "@quarto/api/platform"; + export const denoHost: PlatformHost = { + fs: { + readTextFileSync: Deno.readTextFileSync, + writeFileSync: (p, c) => Deno.writeFileSync(p, + typeof c === "string" ? new TextEncoder().encode(c) : c), + exists: (p) => { try { Deno.statSync(p); return true; } catch { return false; } }, + }, + process: { + exec: async (cmd, args, opts) => + await new Deno.Command(cmd, { args, ...opts }).output(), + }, + realPath: Deno.realPathSync, + isInteractive: Deno.stdin.isTerminal(), + isCI: !!Deno.env.get("CI"), + }; + ``` + +- [ ] Create `src/quarto-api.ts` — stub implementation: + - Build a `QuartoAPI` object from `EngineHostContext` as a plain nested + record (no registry pattern — Quarto 1's `QuartoAPIRegistry` and + `register.ts` side-effect module are deliberately not ported). + - For now, return stubs for every namespace that throw "not yet implemented". + The real implementations live in `@quarto/api` and are wired in by + Plans 2 and 3 (which replace these stubs with real factories threaded + through `denoHost`). + - Both `quarto.text` and `quarto.mappedString` namespaces exist on the + surface (Q1 compat); their implementations will later both pull from + `@quarto/api/text`. + +- [ ] Create `src/mapped-source.ts` — MappedString rehydration from + `TsSourceMapEntry[]`. This is the q2-specific piece (not in `@quarto/api` + itself) because the `source_map` crosses the protocol boundary as data + rather than in-memory references. + + **Rust side flattens before sending:** + - `Original { file_id, start_offset }` → resolve FileId to path via SourceContext + - `Substring` → walk parent chain to Original, compute absolute file offset + - `FilterProvenance` → emit with empty `file` string (sentinel, unmappable) + - Nested `Concat` → flatten recursively + File IDs are resolved to path strings on the Rust side since the Deno + process doesn't have SourceContext. + + **Deno-side reconstruction:** + 1. For each unique file in `source_map`, lazily read the file via + `denoHost.fs.readTextFileSync` and create a base `MappedString` with + `.fileName` set (cached per file for identity + single read). + 2. The main MappedString's `.map(index)` binary-searches the sorted + entries to find which piece contains the index, computes the offset + in the original file (`piece.fileOffset + (index - piece.start)`), + and returns `{ index: offset, originalString: baseForFile }`. + 3. For `closest=true` on an unmappable range (empty file sentinel), + scan to the nearest entry with a valid file mapping. + 4. `splitLines` and `indexToLineCol` are pure TS utilities that + operate on this MappedString — no special protocol support needed. + 5. This gives character-level accuracy — engines like Julia that + call `line.map(0, true)` in `buildSourceRanges()` get correct + original file + position, even through include boundaries. + +- [ ] **MappedString serialization for `markdownForFile` (Deno → Rust):** + When an engine converts a non-QMD file to QMD via `markdownForFile`, the + result is a `MappedString` with provenance back to the original file. + The harness serializes this mapping by walking the output text, calling + `.map()` to find contiguous ranges mapping to the same file with sequential + offsets, and emitting `TsSourceMapEntry` values. The Rust side converts + these entries to `SourceInfo::Concat` and attaches it to the AST parsed + from the generated QMD, enabling error positions in the original `.jl`/`.py` + file rather than in ephemeral generated text. + +- [ ] Create `src/engine-loader.ts`: + - Dynamically import the engine module: `await import(toFileUrl(path))` + - Validate it has a default export with `name`, `claimsLanguage`, `launch` + - Return the `ExecutionEngineDiscovery` object + +- [ ] Create `src/types.ts` — protocol message type definitions (must match + the Rust enums in Plan 1a exactly). + +### Phase 4: Bundle + CI + +- [ ] Build the bundle with `npm run build` and check `dist/engine-host-deno.js` into git. +- [ ] Add a CI check (or xtask lint) that verifies the checked-in bundle is up + to date with the sources. + +## Design Notes + +### Stderr handling + +The subprocess's stderr is forwarded to q2's logging. The engine-host-deno +harness prefixes log lines with level markers so q2 can parse them: +``` +[INFO] Checking Julia installation... +[WARN] Julia server connection slow +[ERROR] Julia process crashed +``` + +Unprefixed stderr lines (from the engine itself or from Deno) are logged at INFO level. + +### Stdout/stderr contract + +**Stdout is exclusively for JSON protocol messages**, one per line. The +engine-host-deno harness writes responses there. If anything else writes to +stdout, the protocol is corrupted. + +- The harness overrides `console.log`/`console.info`/`console.warn`/`console.error` + to all write to **stderr** instead. This handles the common case of engines using + `console.log` for debugging. +- Engines should use `quarto.console.*` (which writes to stderr with level prefixes) + for diagnostics. +- We **cannot** prevent a determined engine from calling `Deno.stdout.writeSync()` + directly — this is documented as a contract violation that will break the protocol. +- On the Rust side (Plan 1a), if a line from stdout fails to parse as JSON, + report a clear error: "Engine wrote non-protocol output to stdout. Engine + extensions must use stderr for diagnostics." + +### Where is engine-host-deno.js at runtime? + +The engine-host-deno harness is bundled into a single `.js` file using **esbuild**. + +**Build pipeline:** +1. `ts-packages/quarto-engine-host-deno/esbuild.config.mjs` bundles `src/host.ts` → `dist/engine-host-deno.js` +2. The bundle is checked into git (like `quarto-system-runtime/js/dist/ejs-bundle.js`) +3. `include_str!("../../ts-packages/quarto-engine-host-deno/dist/engine-host-deno.js")` embeds it in the q2 binary +4. At runtime, write the embedded string to a temp file, run `deno run --allow-all ` + +The engine-host-deno bundle includes `@quarto/api` (all subpaths — text, +markdown, jupyter, format, path, system, console, crypto) and the harness +glue (host, deno-host, quarto-api, mapped-source, engine-loader) — a +single self-contained `.js` file. Only developers editing the TS harness +or `@quarto/api` code need to rebuild it. + +**Bundle size note:** The bundle may be large (200-500 KB estimated, depending on +`@quarto/api/jupyter` complexity). Currently q2 only embeds ~50 KB of JS via +`include_str!`. The engine-host-deno bundle is gated behind +`#[cfg(not(target_arch = "wasm32"))]` so WASM builds don't carry it. Flagged +as a possible future concern — if the bundle grows problematically, options +include a cargo feature flag to gate the embed, or loading from a known +filesystem path instead of embedding. For now, embedding is the simplest +approach and matches the existing `quarto-system-runtime` pattern. + +### Why a separate plan from 1a? + +The Rust-side infrastructure (Plan 1a) and the Deno-side harness (this plan) +are independent once the protocol schema is frozen. Splitting them makes +Plan 1a focus on the Rust compile-time / trait / subprocess-management +concerns, while this plan focuses on a TypeScript package with its own build +pipeline, esbuild config, and test setup. They can be worked on in parallel +if two people are available, and the separation naturally reflects the +`@quarto/engine-host-deno` / `@quarto/engine-host-wasm` split that the +`PlatformHost` abstraction enables later. + +## Success Criteria + +- [ ] `@quarto/engine-host-deno` package exists with package.json, + esbuild.config.mjs, tsconfig +- [ ] Harness dispatches every protocol message type from Plan 1a's `ToEngine` + enum; optional engine methods fall back to sensible defaults when absent +- [ ] `target()` handled as harness-internal (never reaches the protocol) +- [ ] `partitionedMarkdown` falls back to `partition(markdownForFile(...))` + when the engine doesn't override it +- [ ] MappedString rehydration from `source_map` works end-to-end — a + `.map(index)` call returns `{ index, originalString }` pointing at the + correct file and offset even through include boundaries +- [ ] MappedString serialization for `markdownForFile` responses is + implemented and round-trips through the Rust side +- [ ] `denoHost: PlatformHost` in place; `quarto-api.ts` stub returns a + QuartoAPI object where every namespace throws "not yet implemented" + (replaced by Plans 2 and 3) +- [ ] Bundle builds cleanly with `npm run build`, produces + `dist/engine-host-deno.js`, and the bundle is checked into git +- [ ] CI check verifies the checked-in bundle is up to date diff --git a/claude-notes/plans/2026-04-16-plan1c-extension-integration.md b/claude-notes/plans/2026-04-16-plan1c-extension-integration.md new file mode 100644 index 000000000..4e4af5cdb --- /dev/null +++ b/claude-notes/plans/2026-04-16-plan1c-extension-integration.md @@ -0,0 +1,355 @@ +# Plan 1c: Extension Integration & End-to-End + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Plan 1a (Rust core: protocol, subprocess, trait, `TsEngine`) +and Plan 1b (Deno harness: `@quarto/engine-host-deno`). Phases 1-2 of this +plan could technically start from Plan 1a alone (no subprocesses spawned), +but Phase 3 (echo engine E2E test) requires both. +**Blocks:** Plan 4 (Julia Validation) +**Estimated sessions:** 1-2 + +## Overview + +Wire the TS engine infrastructure from Plans 1a and 1b into the extension +system and detection pipeline. Parse engine contributions from +`_extension.yml`, build TS extensions with `deno bundle`, migrate the +`EngineRegistry` into `StageContext`, rewrite engine detection with the +4-phase algorithm, and validate end-to-end with an echo engine integration +test. + +## Phase order + +Phase 1 → Phase 2 → Phase 3 + +## Work Items + +### Phase 1: Extension discovery and build + +Parse `_extension.yml` for engine contributions, build TS extensions into bundled JS, and register `TsEngine` instances. + +Following Quarto 1's approach: engine extensions are **built** (bundled from TS to a single JS file) before execution. At runtime, q2 loads the bundled `.js` file — no import map or TS transpilation needed at execution time. + +- [ ] Add `engines` field to the `Contributes` struct in `crates/quarto-core/src/extension/types.rs`: + ```rust + /// Engine contributions: paths to TS engine modules or engine name + /// strings for reordering. + pub engines: Vec, + ``` + And define: + ```rust + /// An engine contributed by an extension. + #[derive(Debug, Clone)] + pub enum EngineContribution { + /// An external engine module (.ts source or .js bundle). + /// Absolute path (resolved during read_extension). + External { + path: PathBuf, + /// Static hints: languages this engine might claim (e.g., ["julia"]). + /// Used to skip launching the Deno subprocess when no code blocks + /// match. Empty = no hint, always consult engine dynamically. + languages: Vec, + /// Static hints: file extensions this engine might claim (e.g., [".jl"]). + /// Used to skip launching the subprocess when the input file doesn't + /// match. Empty = no hint, always consult engine dynamically. + file_extensions: Vec, + }, + /// A bare engine name string — reordering hint that moves a + /// previously registered engine to higher priority. + Reorder { name: String }, + } + ``` + This extends Quarto 1's schema: `contributes.engines` accepts both + objects with a `path` property (creating new engines) and bare strings + (reordering hints). The `languages` and `file-extensions` fields are + q2 additions — optional static hints that let q2 skip launching the + Deno subprocess when the engine is clearly irrelevant (e.g., a project + with only `{python}` blocks doesn't need to launch the Julia engine). + If omitted, q2 falls back to always consulting the engine dynamically. + +- [ ] Add `engines` parsing in `parse_contributes()` in `crates/quarto-core/src/extension/read.rs`: + - Handle array of strings (reordering hints → `EngineContribution::Reorder`) + and objects with `path` key (resolve to absolute paths relative to ext_dir + → `EngineContribution::External`) + - For object entries, also read optional `languages` (array of strings) and + `file-extensions` (array of strings). These are static hints for pre-filtering. + - Include `engines` in the "at least one sub-field" validation check + - This supersedes Phase 8 of the extensions grand plan + (`claude-notes/plans/2026-03-16-extensions-grand-plan.md`) + +- [ ] Define extension YAML schema for engines, extending Quarto 1's schema: + ```yaml + contributes: + engines: + - path: julia-engine.js # required: path to bundled JS + languages: ["julia"] # optional: languages this engine might claim + file-extensions: [".jl"] # optional: file extensions this engine might claim + - jupyter # string form: reordering hint + ``` + **Quarto 1 reference:** The extension schema (in `src/resources/schema/extension.yml`) + defines engines as an array of either strings (engine names for reordering) or + objects with a `path` property. Both forms are allowed in both `_extension.yml` + and `_quarto.yml` (identical schema). + + **q2 extension:** The `languages` and `file-extensions` fields are new to q2. + They are conservative supersets — they list what the engine *might* claim. + The dynamic `claimsLanguage`/`claimsFile` functions in the TS engine are the + precise check. Static hints avoid launching the Deno subprocess when the engine + is clearly irrelevant. Extension authors are incentivized to declare them for + faster project scans. If omitted, the subprocess is always consulted. + + The Julia engine's `_extension.yml` uses `- path: julia-engine.js` (pointing to + the pre-built bundle). The engine's name comes from the module's `name` property + at runtime, not from the YAML. + + In q2, the `path` can point to either the `.ts` source (build step produces `.js`) or a pre-built `.js` bundle. Discovery queries (claimsLanguage, claimsFile) are handled dynamically by the subprocess. +- [ ] Implement engine extension build step: + - Provide an import map (`resources/extension-build/import-map.json`) that resolves: + - `@quarto/types` → our type definitions (`.d.ts`, erased during bundling) + - `"path"` → `jsr:@std/path` + - `"fs/exists"` → `jsr:@std/fs/exists` + - `"encoding/base64"` → `jsr:@std/encoding/base64` + - Provide a `deno.json` config pointing to the import map + - Use `deno bundle --config= ` to produce a single `.js` file + - Output the bundle to `_extensions/{name}/{stem}.js` + - This mirrors Quarto 1's `quarto call build-ts-extension` command +- [ ] Implement a `quarto build-ts-extension` subcommand (or integrate into existing build pipeline). CLI subcommands are defined using `clap` in `crates/quarto/src/main.rs` — add a new variant to the `Commands` enum, create a handler module in `crates/quarto/src/commands/`, and add the match arm in `main()`. +- [ ] Scan `_extensions/` for engine contributions during project initialization. + **Quarto 1 reference:** `resolveEngineExtensions()` in `src/project/project-context.ts` discovers extensions with `contributes.engines`, merges them into `projectConfig.engines`. Then `resolveEngines()` in `src/execute/engine.ts` imports and registers them. +- [ ] For each discovered engine: + 1. Check if a bundled `.js` exists (built output) + 2. If not, error with a clear message: "Engine extension '{name}' has no + bundled .js file. Run `quarto build-ts-extension` in the extension + directory to build it." No auto-building — matching Quarto 1's contract + where extension authors build/bundle and check in the .js artifact. + 3. Create a `TsEngine` instance pointing to the bundled `.js` + 4. Register it in the `EngineRegistry` +- [ ] Support `_quarto.yml` `engines:` list for ordering. Following Quarto 1's model: + 1. Extension-contributed engines are appended to `projectConfig.engines` + 2. Engines listed explicitly in `_quarto.yml` `engines:` come first (higher priority on ties) + 3. Standard engines (knitr, jupyter, markdown) follow + 4. This ordering affects `claims_language` tie-breaking (first engine with highest score wins) +- [ ] Update engine detection to recognize extension-provided engine names +- [ ] Support `engine: julia` in document YAML triggering the extension engine +- [ ] Write test: fixture extension directory → build → engine registered and detectable +- [ ] Write test: `_quarto.yml` `engines:` list controls ordering + +### Phase 2: Engine detection rewrite + registry migration + +Rewrite engine detection to use the 4-phase algorithm, move the `EngineRegistry` +from `EngineExecutionStage` into `StageContext`, and restructure the pipeline +entry point so that `claimsFile` (Phase 1) runs before `ParseDocument`. + +**Current state:** `detection.rs` only checks YAML metadata (explicit `engine:` key +and engine-name top-level keys). No language-based detection exists — the file has a +"Future Enhancements" comment. The `EngineRegistry` is created inside +`EngineExecutionStage::new()` with only built-in engines, and +`EngineExecutionStage.run()` takes `&self` so can't mutate the registry. + +**Quarto 1 reference:** `fileExecutionEngine()` in `src/execute/engine.ts` (lines +302-351) and `markdownExecutionEngine()` (lines 146-211) implement the 4-phase algorithm. + +**Modernization:** In Quarto 1, Jupyter explicitly claimed "julia" via `claimsLanguage`, +creating a priority conflict with the Julia engine extension (both returned `true` = +priority 1, winner depended on registration order). In q2, Jupyter does NOT claim +"julia" — that's the Julia extension's job. Everything else matches Quarto 1: Jupyter +claims no languages via Phase 3 (Python and other languages reach Jupyter via the +Phase 4 fallback, same as Quarto 1), and Phase 4 is preserved because Jupyter is a +universal kernel executor. + +**Pre-parse engine detection:** Phase 1 (`claimsFile`) must run BEFORE +`ParseDocument`, because if an engine claims a non-QMD file (e.g., `.jl` +percent script), the engine must convert it to QMD before pampa can parse it. +The flow is: + +``` +Input file + │ + ├─ claimsFile: engine claims it (non-QMD) ─→ markdownForFile ─→ QMD text + │ │ + └─ claimsFile: no engine claims it ─────────────────────────────────┤ + ▼ + ParseDocument + │ + (rest of pipeline) +``` + +For QMD files, no engine claims via `claimsFile` (`.qmd` is not a +percent/spin format), so parsing proceeds directly. Engine detection +continues later via Phases 2-4 (YAML, language scan, fallback) which +operate on the parsed AST. + +For non-QMD files (`.jl`, `.py`, `.r`, `.ipynb`), an engine claims the +file, provides QMD text via `markdownForFile`, and that text enters the +pipeline. For TS engines, this requires the Deno subprocess to be running +— it's lazily spawned on first `claimsFile` query. + +- [ ] **Move `EngineRegistry` from `EngineExecutionStage` into `StageContext`.** + Currently `EngineExecutionStage` owns the registry (created with built-ins only in + `new()`). Move it to `StageContext` where it's built during `StageContext::new()`: + 1. Start with `EngineRegistry::new()` (built-in engines) + 2. Scan `ctx.extensions` for engine contributions (`contributes.engines`) + 3. For each `EngineContribution::External`, create a `TsEngine` and `registry.register()` it + 4. For each `EngineContribution::Reorder`, add to the engine ordering list + 5. Apply ordering: user-specified engines first, then remaining registered engines + 6. Store as `ctx.registry: EngineRegistry` + `EngineExecutionStage` becomes stateless — its `run()` reads `ctx.registry`. + Remove the `registry` field from `EngineExecutionStage`; the `with_registry()` + test constructor is replaced by tests that build a `StageContext` with a custom registry. + +- [ ] **Add `claimed_engine_name: Option` to `StageContext`.** + Set by the pre-parse stage (below) when an engine claims a file via `claimsFile`. + Read by `EngineExecutionStage` — if set, look up by name in registry, skip + Phases 2-4 detection. + +- [ ] **Create `EngineClaimsFileStage`** — a new `LoadedSource → LoadedSource` pipeline + stage inserted before `ParseDocumentStage` in `build_html_pipeline_stages()`. + This stage: + 1. Gets the file extension from `LoadedSource.path` + 2. For each engine in `ctx.registry` (in order), calls `claims_file(file, ext)` + 3. If an engine claims the file, calls `markdown_for_file(file)` to get QMD text + 4. Replaces `LoadedSource.content` with the QMD bytes and `source_type` with `Qmd` + 5. Stores `ctx.claimed_engine_name = Some(engine.name().to_string())` + 6. Converts the returned `TsMappedStringWithMap.source_map` entries to a + `SourceInfo::Concat` for source provenance tracking + 7. If no engine claims the file, passes through unchanged (the common case for `.qmd`) + For TS engines, this lazily spawns the Deno subprocess on first `claimsFile` query. + **WASM note:** A future plan will need to include this stage in the WASM pipeline + (built-in engines may eventually claim `.ipynb` etc. without Deno). + +- [ ] **Remove the `KNOWN_ENGINES` constant** from `detection.rs`. Currently hardcoded + as `["markdown", "knitr", "jupyter"]`. With extension engines, the set of known + engines is dynamic — it's whatever's in the registry. Replace any usage of + `KNOWN_ENGINES` (currently used in Phase 2 detection for top-level YAML key scanning) + with a query against the registry's engine names: `registry.engine_names()`. + +- [ ] Implement 4-phase detection (new function or refactor of `detect_engine()`). + New signature: `detect_engine(metadata, registry, ast, claimed: Option<&str>) → DetectedEngine` + (takes the registry, parsed AST, and optional pre-claimed engine name). + If `claimed` is `Some(name)`, skip all phases and look up directly by name (set by + `EngineClaimsFileStage` via `ctx.claimed_engine_name`). + 1. **Phase 1 — File extension claims**: For each engine in registry order, call + `claims_file(file, ext)`. First engine to claim wins. Used for `.ipynb` → jupyter, + `.rmd` → knitr. (Note: for the pre-parse flow, this already ran in + `EngineClaimsFileStage` and the result is in `claimed`. This phase is still + needed for cases where the file extension maps to a built-in engine that + doesn't need `markdownForFile`, e.g., `.ipynb` → jupyter.) + 2. **Phase 2 — YAML declaration**: Check explicit `engine:` key in frontmatter + (existing logic). Also check engine-name top-level keys — scan + `registry.engine_names()` instead of `KNOWN_ENGINES`. Skip phases 3-4. + 3. **Phase 3 — Language scanning**: Extract languages + first classes from code blocks + using the **parsed AST** (not regex — q2 has pampa for this). For each language, + call each engine's `claims_language(language, first_class)`. Highest `Option` + score wins. Engine iteration order breaks ties (user-specified engines first). + 4. **Phase 4 — Fallback**: If unclaimed computational languages exist (not handler + languages like `ojs`), default to Jupyter (it may have a kernel). If no + computational code blocks at all, default to markdown engine. +- [ ] For language extraction from AST: use pampa's existing parsing to get code block + languages and their classes, rather than regex. Quarto 1 uses + `languagesWithClasses()` regex on raw markdown; we should use the parsed + tree-sitter AST instead. +- [ ] `claimsFile` results are NOT cacheable — implementations may inspect file content + (e.g., Julia engine reads the file to check for percent script `# %%` markers). + Cache `claimsLanguage` results per engine per `(language, first_class)` pair. +- [ ] When a document has an explicit `engine: julia` in YAML, skip discovery entirely + — just look up the engine by name in the registry. This is the common case and + requires zero subprocess calls. +- [ ] Write test: engine claims "julia" language, document with `{julia}` blocks selects it +- [ ] Write test: explicit `engine: julia` in YAML skips discovery, resolves directly +- [ ] Write test: priority scoring — higher score wins over lower score +- [ ] Write test: unclaimed computational language → falls through to Jupyter (Phase 4 fallback) +- [ ] Write test: no code blocks → falls through to markdown engine +- [ ] Write test: extension engine registered in context, discoverable by name + +### Phase 3: Echo engine integration test + +End-to-end test with a minimal TypeScript engine. + +**Dependency note:** The echo engine imports types from `@quarto/types`. If Plan 2 +Phase 2D hasn't defined these yet, create a minimal type stub inline in the echo +engine file (just the interfaces it needs: `ExecutionEngineDiscovery`, +`ExecutionEngineInstance`, `QuartoAPI`). These can be replaced with proper imports later. + +- [ ] Create test fixture `tests/fixtures/extensions/echo-engine/`: + ``` + _extension.yml + src/echo-engine.ts + ``` +- [ ] `echo-engine.ts` — claims "echo" language, returns input with markers: + ```typescript + const echoEngine: ExecutionEngineDiscovery = { + name: "echo", + claimsLanguage: (lang) => lang === "echo", + launch: (ctx) => ({ + name: "echo", + canFreeze: false, + execute: async (opts) => ({ + engine: "echo", + markdown: opts.target.markdown.value.replace( + /```\{echo\}[\s\S]*?```/g, + "**ECHO_EXECUTED**" + ), + supporting: [], + filters: [], + }), + // ... minimal stubs for other methods + }), + }; + export default echoEngine; + ``` +- [ ] Write Rust integration test: + 1. Set up project with echo engine extension + 2. Render a .qmd with `{echo}` code blocks using `cargo run -- render ` (the `quarto` crate is the main CLI binary). Alternatively, write a Rust test that programmatically invokes the rendering pipeline — check existing tests in `crates/quarto/tests/` for patterns. + 3. Verify output contains "ECHO_EXECUTED" +- [ ] This test validates the full pipeline: discovery → subprocess spawn → protocol → execution → result + +## Design Notes + +### Extension build model + +Following Quarto 1's two-step approach: +1. **Build time:** `deno bundle --config= ` bundles the TS engine extension into a single `.js` file. An import map resolves `@quarto/types` (erased as type-only), Deno std lib imports, etc. All dependencies are inlined. `deno bundle` is a stable Deno feature (reintroduced in Deno 2.4, permanently supported; uses esbuild under the hood). +2. **Runtime:** The Deno subprocess loads the bundled `.js` file via dynamic `import()`. No import map or TS transpilation needed — everything is already resolved and bundled. + +Note: The **engine-host harness** is built with esbuild (matching existing q2 patterns), while **engine extensions** are built with `deno bundle` (matching Quarto 1's extension build model and handling Deno-specific imports like `jsr:` specifiers). These are different build steps for different artifacts. + +This means the Deno subprocess invocation is simple: +```bash +deno run --allow-all +``` + +No `--import-map` flag needed at runtime. + +## Future Work: Built-in engine percent/spin script support + +The pre-parse `claimsFile` → `markdownForFile` flow (Phase 2 above) is +designed for TS engine extensions but also applies to built-in engines. +Currently, q2's built-in engines don't implement `claims_file` or +`markdown_for_file` — they only handle `.qmd` input. + +Adding non-QMD file support to built-in engines requires implementing +the trait methods on each engine: + +- **Jupyter**: `claims_file(".py") → true`, `claims_file(".jl") → true`, + `markdown_for_file` with a Rust percent-script converter (port of + Quarto 1's `markdownFromJupyterPercentScript`) +- **Knitr**: `claims_file(".r") → true` (for spin scripts), + `markdown_for_file` invoking R's `knitr::spin()` via the R subprocess + +No pipeline changes needed — the architecture from Phase 2 supports it. +This is out of scope for this plan (validation target is `.qmd` files) +but is a natural follow-on. + +## Success Criteria + +- [ ] Extension discovery finds engine extensions in `_extensions/` +- [ ] Both string (reordering) and object (new engine) forms parsed from `contributes.engines` +- [ ] `_quarto.yml` `engines:` list controls engine ordering +- [ ] `EngineRegistry` lives in `StageContext`, populated with extension engines +- [ ] `KNOWN_ENGINES` constant removed; detection uses registry dynamically +- [ ] `EngineClaimsFileStage` runs before `ParseDocumentStage`, claims non-QMD files +- [ ] `claimed_engine_name` propagates from pre-parse stage to `EngineExecutionStage` +- [ ] 4-phase engine detection works: file extension → YAML → language scan → Jupyter fallback (unclaimed langs) / markdown (no code) +- [ ] Echo engine integration test passes end-to-end +- [ ] Tests requiring Deno are skipped if Deno is absent (runtime `has_deno()` + check with early return, matching the pandoc test pattern) +- [ ] All existing tests pass (no regressions) diff --git a/claude-notes/plans/2026-04-16-quarto-jupyter.md b/claude-notes/plans/2026-04-16-quarto-jupyter.md new file mode 100644 index 000000000..9330ed08f --- /dev/null +++ b/claude-notes/plans/2026-04-16-quarto-jupyter.md @@ -0,0 +1,418 @@ +# Plan 3: @quarto/api/jupyter + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Plan 2 Phase 2A (package skeleton). Phases 3A-3D and 3F otherwise independent. Phase 3E (wiring into engine-host) requires Plan 1b to have created the `@quarto/engine-host-deno` package. +**Blocks:** Plan 4 (Julia Validation) +**Estimated sessions:** 2-3 + +## Overview + +Populate the `jupyter/` subpath of `@quarto/api` — a clean implementation of +Jupyter notebook → markdown conversion and related utilities. This is the +`quarto.jupyter` namespace of the QuartoAPI. + +The Julia engine calls 7 methods from this namespace. The core function +`toMarkdown()` is the single most complex piece of the entire engine +extension project (~1300 lines of logic), but it's conceptually +straightforward: walk notebook cells, format outputs as markdown, handle +figures and HTML preservation. + +**Reference:** The Quarto 1 implementations to study are in +`~/src/quarto-cli/src/core/jupyter/` (a separate repository — +quarto-dev/quarto-cli). Key files: `jupyter.ts` (main toMarkdown), +`display-data.ts`, `tags.ts`, `labels.ts`, `preserve.ts`, `widgets.ts`, +`types.ts`. + +## Package location + +All files under `ts-packages/quarto-api/src/jupyter/`. No separate +`package.json` — `jupyter/` is a subpath of the single `@quarto/api` +package created in Plan 2A. Consumers import via +`@quarto/api/jupyter`. + +## Platform dependencies + +Three `jupyter/` pieces touch the filesystem and therefore take a +`PlatformHost` (see Plan 2's `PlatformHost` section): + +| Function | Needs host for | +|---|---| +| `jupyterToMarkdown(nb, opts)` | Writing figure image files (base64 decode → `host.fs.writeFileSync`) | +| `isPercentScript(file, exts)` | Reading the file to check for `# %%` markers | +| `percentScriptToMarkdown(file)` | Reading the source file | + +The other three methods (`assets`, `resultIncludes`, `resultEngineDependencies`) +are pure — no host. Plus all the internal supporting modules +(`display-data`, `tags`, `labels`, `preserve`, `widgets`, `pandoc-id`, +`cell-options`) are pure. + +Public shape: `src/jupyter/index.ts` exports a single factory +`createJupyter(host: PlatformHost)` that returns the full namespace +(all six public methods). Host-free methods are still implemented as pure +exports internally — the factory just wraps them. This matches the +`createPath` / `createSystem` / `createMappedStringFromFile` pattern in +Plan 2: one entry point per subpath, consistent wiring in +`@quarto/engine-host-deno`. + +## What the Julia engine calls + +| Method | What it does | Complexity | +|--------|-------------|------------| +| `toMarkdown(nb, opts)` | Convert Jupyter notebook → markdown string | **High** — the core function | +| `isPercentScript(file, exts)` | Check if file is a percent-format script | Low — check for `# %%` markers | +| `percentScriptToMarkdown(file)` | Convert percent script → markdown | Low — regex transformation | +| `assets(input, to)` | Compute asset directory paths for figures | Low — path computation | +| `resultIncludes(tempDir, deps)` | Extract pandoc includes from widget deps | Low — object transformation | +| `resultEngineDependencies(deps)` | Extract engine-specific deps | Low — pass-through | + +## Work Items + +### Phase 3A: Types and foundation + +- [ ] Confirm `@quarto/api` package skeleton from Plan 2A is in place. If + Plan 2A hasn't landed, create the minimal package scaffolding first + (`package.json`, `tsconfig.json`, `exports` map including `./jupyter`). + +- [ ] Create `src/jupyter/types.ts` — Jupyter notebook types: + ```typescript + interface JupyterNotebook { + nbformat: number; + nbformat_minor: number; + metadata: NotebookMetadata; + cells: JupyterCell[]; + } + interface JupyterCell { + cell_type: "code" | "markdown" | "raw"; + source: string | string[]; + metadata: CellMetadata; + outputs?: CellOutput[]; // code cells only + execution_count?: number | null; + } + interface CellOutput { + output_type: "stream" | "display_data" | "execute_result" | "error"; + // Fields vary by output_type + text?: string | string[]; + data?: MimeBundle; + name?: string; // "stdout" | "stderr" for stream + ename?: string; // for error + evalue?: string; + traceback?: string[]; + } + type MimeBundle = Record; + ``` + Reference: Quarto 1's `src/core/jupyter/types.ts` + +- [ ] Create `src/jupyter/constants.ts` — MIME type constants, cell option + keys, etc. Reference: Quarto 1's `src/config/constants.ts` (just the + subset we need). + +### Phase 3B: Supporting modules + +Small, focused modules that `toMarkdown` depends on. Each is self-contained. + +- [ ] Create `src/jupyter/display-data.ts` — MIME bundle dispatch: + - `displayDataMimeType(output, options)` — select best MIME type from bundle + - `displayDataIsImage(output)`, `displayDataIsTextPlain(output)`, etc. + - MIME priority order: text/html > image/svg+xml > image/png > image/jpeg > text/markdown > text/latex > text/plain + - Reference: Quarto 1's `src/core/jupyter/display-data.ts` + - ~150 lines + +- [ ] Create `src/jupyter/tags.ts` — cell visibility logic: + - `hideCell(options)`, `hideCode(options)`, `hideOutput(options)`, `hideWarnings(options)` + - `includeCell(cell, options)`, `includeCode(cell, options)`, `includeOutput(cell, options)` + - Based on cell-level `echo`, `include`, `output`, `warning` options + - Reference: Quarto 1's `src/core/jupyter/tags.ts` + - ~100 lines + +- [ ] Create `src/jupyter/labels.ts` — cell label and caption handling: + - `cellLabel(cell)` — extract label from cell metadata or options + - `cellLabelClass(label)` — generate CSS class from label + - `resolveCaptions(cell)` — extract fig-cap, tbl-cap, etc. + - Reference: Quarto 1's `src/core/jupyter/labels.ts` + - ~100 lines + +- [ ] Create `src/jupyter/preserve.ts` — HTML preservation: + - `removeAndPreserveHtml(output)` — replace raw HTML with placeholder UUIDs + - Returns `{ output: string, preserved: Record }` + - Used to protect HTML from Pandoc's markdown processing + - Reference: Quarto 1's `src/core/jupyter/preserve.ts` + - ~80 lines + +- [ ] Create `src/jupyter/widgets.ts` — Jupyter widget dependency extraction: + - `widgetDependencies(outputs)` — find widget state in output MIME bundles + - `widgetDependencyIncludes(deps, tempDir)` — generate script tags for widgets + - Reference: Quarto 1's `src/core/jupyter/widgets.ts` + - ~100 lines + +- [ ] Create `src/jupyter/pandoc-id.ts` — identifier generation: + - `pandocAutoIdentifier(text)` — generate Pandoc-style IDs from heading text + - Pure string manipulation, no dependencies + - Reference: Quarto 1's `src/core/pandoc/pandoc-id.ts` + - Note: lives under `jupyter/` for now because jupyter is the only + consumer. If other consumers emerge, promote to a top-level `pandoc/` + subpath — cheap move, cheap rename. + - ~50 lines + +- [ ] Create `src/jupyter/cell-options.ts` — simplified cell options parsing: + - Parse YAML from code cell comments (`#| key: value` lines) + - Use `yaml` package directly (no schema validation) + - Extract cell-level execution options + - **Simplified from Quarto 1**: no schema validation, no tree-sitter + - ~100 lines + +### Phase 3C: Core toMarkdown function + +The main conversion function. Takes a `JupyterNotebook` and options, returns +markdown string. + +- [ ] Create `src/jupyter/to-markdown.ts`: + ```typescript + export interface JupyterToMarkdownOptions { + language: string; // e.g., "julia", "python" + assets: JupyterAssets; // figure output paths + execute: CellExecuteOptions; // echo, include, output, warning defaults + keepHidden: boolean; + toHtml: boolean; + toLatex: boolean; + toMarkdown: boolean; + toIpynb: boolean; + toPresentation: boolean; + figFormat: string; // "png", "svg", "pdf", etc. + figDpi: number; + preserveCodeCellYaml?: boolean; + } + + export interface JupyterToMarkdownResult { + cellOutputs: string[]; // markdown for each cell + pandoc: Record; + htmlPreserve: Record; + dependencies?: JupyterWidgetDependencies; + } + + export function jupyterToMarkdown( + nb: JupyterNotebook, + options: JupyterToMarkdownOptions, + ): JupyterToMarkdownResult; + ``` + +- [ ] Implement cell walking logic: + 1. Iterate notebook cells + 2. For each markdown cell: emit source as-is + 3. For each code cell: + a. Check visibility (echo, include, output options via tags.ts) + b. Extract cell label and options + c. Emit code fence with language and options + d. Format each output (see below) + e. Handle figure outputs (write to disk, emit `![]()` reference) + 4. For each raw cell: emit with format marker + +- [ ] Implement output formatting: + - **stream output** (stdout/stderr): emit as text, strip ANSI codes + - **display_data / execute_result**: dispatch by MIME type (display-data.ts) + - `text/html` → emit as raw HTML block (with preservation) + - `image/png`, `image/jpeg` → decode base64, write to file, emit `![](path)` + - `image/svg+xml` → write to file, emit `![](path)` + - `text/plain` → emit as text output + - `text/latex` → emit as math block + - `text/markdown` → emit directly + - `application/json` → emit as code block + - **error output**: format traceback, strip ANSI codes + +- [ ] Implement figure handling: + - Write image data to `assets.figuresDir` via `host.fs.writeFileSync` + (base64 decode for PNG/JPEG, write as bytes; SVG written as text). + The host is captured via the `createJupyter(host)` factory closure — + `to-markdown.ts` itself takes `host` as a parameter on the internal + implementation function. + - Generate filename from cell label or counter + - Emit markdown image reference with optional caption, width/height + - Handle `fig-format` option (request specific format from kernel) + +- [ ] Implement HTML preservation: + - Protect HTML outputs from Pandoc processing + - Use UUID placeholders (preserve.ts) + - Return preservation map for post-processing + +- [ ] ANSI code handling: + - Strip ANSI escape codes from text outputs + - Simple regex replacement (not full ANSI→HTML conversion like Quarto 1's deno-dom approach) + - Can add HTML conversion later if needed + +- [ ] Reference: Quarto 1's `src/core/jupyter/jupyter.ts` function `jupyterToMarkdown` (~lines 380-700) + +### Phase 3D: Utility functions + +The simpler methods that the Julia engine also calls. + +- [ ] Create `src/jupyter/percent-script.ts` — host-dependent (reads files): + - Internal functions take `host: PlatformHost` as first parameter: + ```typescript + export function isPercentScript(host: PlatformHost, file: string, exts?: string[]): boolean; + export function percentScriptToMarkdown(host: PlatformHost, file: string): string; + ``` + - `isPercentScript` — check extension + read file + look for `# %%` markers + - `percentScriptToMarkdown` — read file + convert percent-format to markdown: + - `# %%` → code cell boundaries + - `# %% [markdown]` → markdown cells + - Other content → code cells + - The public `createJupyter(host)` factory binds `host` so callers see the + natural 1-arg / 2-arg signatures. + - Reference: Quarto 1's `src/core/jupyter/percent.ts` + - ~80 lines + +- [ ] Create `src/jupyter/assets.ts`: + - `assets(input, to?)` — compute figure directory paths: + ```typescript + function assets(input: string, to?: string): JupyterAssets { + const stem = basename(input, extname(input)); + const baseDir = join(dirname(input), stem + "_files"); + const figDir = join(baseDir, figureDirForFormat(to)); + return { baseDir, figDir, supportingDir: baseDir }; + } + ``` + - ~30 lines + +- [ ] Create `src/jupyter/result-helpers.ts`: + - `resultIncludes(tempDir, deps?)` — extract pandoc includes from widget deps + - `resultEngineDependencies(deps?)` — pass-through or wrap engine deps + - ~40 lines + +- [ ] Create `src/jupyter/index.ts` — exports the `createJupyter(host)` + factory plus all public types (`JupyterNotebook`, `JupyterCell`, + `JupyterToMarkdownOptions`, `JupyterToMarkdownResult`, …). Internal + functions remain accessible via relative paths inside `@quarto/api` for + tests and callers that want to pass their own host. + + ```typescript + // src/jupyter/index.ts + import type { PlatformHost } from "../platform.ts"; + import { jupyterToMarkdown as _toMarkdown } from "./to-markdown.ts"; + import { isPercentScript as _isPercent, percentScriptToMarkdown as _percentMd } + from "./percent-script.ts"; + import { assets } from "./assets.ts"; + import { resultIncludes, resultEngineDependencies } from "./result-helpers.ts"; + + export function createJupyter(host: PlatformHost) { + return { + toMarkdown: (nb, opts) => _toMarkdown(host, nb, opts), + isPercentScript: (file, exts) => _isPercent(host, file, exts), + percentScriptToMarkdown: (file) => _percentMd(host, file), + assets, // pure, no host + resultIncludes, // pure, no host + resultEngineDependencies, // pure, no host + }; + } + export type { /* public types */ }; + ``` + +### Phase 3E: Integration with engine-host + +Wire `@quarto/api/jupyter` into the `quarto.jupyter` namespace in +`@quarto/engine-host-deno`. + +- [ ] Update `@quarto/engine-host-deno/src/quarto-api.ts` to call the + `createJupyter` factory with the same `denoHost` used for the other + namespaces in Plan 2C: + ```typescript + import { createJupyter } from "@quarto/api/jupyter"; + import { denoHost } from "./deno-host.ts"; + + function buildJupyterNamespace(context: EngineHostContext) { + return createJupyter(denoHost); + } + ``` + The factory returns the six-method namespace object directly; the + engine-host layer just forwards it. Any context-dependent wrappers + (e.g. resolving `context.tempDir` for `resultIncludes`) can be + composed around the factory output. +- [ ] `@quarto/api` is already a dependency of `@quarto/engine-host-deno` + (added in Plan 2C) — no new dependency needed. + +### Phase 3F: Testing + +Check existing ts-packages for the test runner convention (likely Vitest). +Run `npm install` from the repo root if the package structure changed. + +- [ ] Unit tests for each supporting module (display-data, tags, labels, preserve, widgets) +- [ ] Unit tests for cell options parsing +- [ ] Integration test: convert a simple notebook JSON (2 code cells, 1 markdown cell) → markdown +- [ ] Integration test: notebook with image output → figure written to disk + markdown reference +- [ ] Integration test: notebook with HTML output → preservation markers +- [ ] Integration test: notebook with error output → formatted traceback +- [ ] Test with a real `.ipynb` file (e.g., from Jupyter's test fixtures) + +## Design Notes + +### Simplified vs. Quarto 1 + +Key simplifications in our rewrite: + +1. **No YAML schema validation** for cell options — just parse with js-yaml +2. **No deno-dom** for ANSI→HTML — just strip ANSI codes (can add conversion later) +3. **No tree-sitter** — cell options parsing uses regex/yaml +4. **No MappedString provenance** — just plain strings with filenames +5. **Flattened options types** — `JupyterToMarkdownOptions` instead of pulling in `ExecuteOptions` → `Format` → `ProjectContext` → ... + +These simplifications mean ~1300 lines of clean code vs. ~5000+ lines of +tangled Quarto 1 code with 30+ transitive dependencies. + +### Dependency on `@quarto/api/text` or `@quarto/api/markdown` + +If `jupyter/` needs text helpers (e.g., `lines`, `pandocAutoIdentifier`) or +markdown parsing, it imports them directly from the sibling subpath (e.g., +`import { lines } from "../text/text.ts"`). Since both live in the same +`@quarto/api` package, there's no cross-package version coordination — they +are released together. If internal relative imports become noisy, we can +use the package's own exports map inside the package (`@quarto/api/text`). + +### Accuracy target + +The output should match Quarto 1's for the common cases: +- Code cells with text, image, and HTML outputs +- Cell visibility options (echo, include, output) +- Figure file generation and referencing +- HTML preservation + +Edge cases where we may differ: +- Rare MIME types (vdom, plotly — add support as needed) +- Complex widget dependency chains +- ANSI color preservation in output (we strip, Quarto 1 converts to HTML spans) + +### Portability constraints + +Same rules as Plan 2 (see "Portability constraints" in that plan): + +1. No q2-specific imports from `jupyter/`. +2. **No `Deno.*` or `node:*` references inside `jupyter/`.** All I/O goes + through the `PlatformHost` passed to `createJupyter(host)`. The three + FS-touching functions (`toMarkdown`'s figure writes, `isPercentScript`, + `percentScriptToMarkdown`) call `host.fs.*` explicitly; the rest of + `jupyter/` is pure. +3. No dependency on `@quarto/engine-host-deno` (dependency runs the other direction). +4. Same package can later run under `@quarto/engine-host-wasm` with a + VFS-backed host — no changes to `jupyter/` required, only a different + `PlatformHost` implementation plugged in at the engine-host layer. + +### Future: Quarto 1 adoption + +`@quarto/api/jupyter` is designed to be importable by Quarto 1, replacing: +- `src/core/jupyter/jupyter.ts` (the `jupyterToMarkdown` function) +- `src/core/jupyter/display-data.ts`, `tags.ts`, `labels.ts`, `preserve.ts`, `widgets.ts` +- Parts of `src/core/jupyter/jupyter-shared.ts` + +The API signatures are compatible. Quarto 1 would need to adapt its options +types to match our flattened `JupyterToMarkdownOptions`. + +## Success Criteria + +- [ ] `@quarto/api/jupyter` populated with all 6 methods the Julia engine uses, + exposed via `createJupyter(host)` factory +- [ ] No `Deno.*` or `node:*` references inside `@quarto/api/jupyter` +- [ ] `toMarkdown` correctly converts notebooks with code, markdown, and raw cells +- [ ] Image outputs write files to disk (via `host.fs.writeFileSync`) and emit + correct markdown references +- [ ] HTML outputs use preservation markers +- [ ] Error outputs format tracebacks readably +- [ ] All tests pass (unit tests can pass a mock host with in-memory FS) +- [ ] Integrated into `@quarto/engine-host-deno`'s QuartoAPI via + `createJupyter(denoHost)` in `buildJupyterNamespace` diff --git a/claude-notes/plans/2026-04-16-quarto-markdown-and-api.md b/claude-notes/plans/2026-04-16-quarto-markdown-and-api.md new file mode 100644 index 000000000..ee34238b6 --- /dev/null +++ b/claude-notes/plans/2026-04-16-quarto-markdown-and-api.md @@ -0,0 +1,529 @@ +# Plan 2: @quarto/api (text, markdown, utilities) + QuartoAPI assembly + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Phases 2A, 2B, 2D are independent. Phase 2C (wiring into engine-host) requires Plan 1b to have created the `@quarto/engine-host-deno` package. +**Blocks:** Plan 3 (@quarto/api/jupyter) is gated on Phase 2A (package skeleton). Plan 4 (Julia Validation) needs all of Plan 2. +**Estimated sessions:** 1-2 + +## Overview + +Create the `@quarto/api` TypeScript package and populate the `text/`, +`markdown/`, `format/`, `path/`, `system/`, `console/`, and `crypto/` +subpaths. Flesh out the QuartoAPI assembly in `@quarto/engine-host-deno` that +Plan 1a stubbed. + +This plan covers everything in the QuartoAPI surface except `quarto.jupyter` +(which lives under `@quarto/api/jupyter` and is the subject of Plan 3). + +## Package layout + +A single `@quarto/api` package with subpath exports. Everything lives under +`ts-packages/quarto-api/src/`: + +``` +text/ ← MappedString + text utilities +markdown/ ← extractYaml, partition, getLanguages, breakQuartoMd +format/ ← isHtmlCompatible, isLatexOutput, … +path/ ← dirAndStem, isQmdFile, toForwardSlashes, … +system/ ← execProcess, pandoc, tempContext, … +console/ ← info, warning, error, withSpinner +crypto/ ← md5Hash +jupyter/ ← Plan 3 + +platform.ts ← PlatformHost interface (see below) +``` + +One `package.json`, one version, one dep list, `exports` map for targeted imports. + +## Cross-environment portability: `PlatformHost` + +`@quarto/api` must run in two environments without modification: + +1. **Deno subprocess** (`@quarto/engine-host-deno`) — the harness built in + Plan 1a. Has full Deno APIs. +2. **q2's WASM runtime in hub-client** (future, not in this plan) — file I/O + goes through q2's VFS (`vfsReadFile`, `vfsAddFile`, …), no subprocesses, + no `Deno` global. + +To keep both targets viable, every I/O-touching submodule takes a +`PlatformHost` parameter instead of calling `Deno.*` directly. Pure submodules +(`markdown/`, most of `text/`, `format/`, `console/`, `crypto/`, most of +`jupyter/`) have no host dependency and work in any JS environment. + +### `src/platform.ts` + +```typescript +export interface PlatformHost { + fs: { + readTextFileSync(path: string): string; + writeFileSync(path: string, content: string | Uint8Array): void; + exists(path: string): boolean; + }; + process?: { // undefined → execProcess throws "not supported" + exec(cmd: string, args: string[], opts?: ExecOptions): Promise; + }; + realPath?(path: string): string; // undefined → absolute() returns path as-is + isInteractive: boolean; + isCI: boolean; +} +``` + +### Submodules that take a host (factories) + +| Submodule | Export shape | +|---|---| +| `text/mapped-from-file.ts` | `createMappedStringFromFile(host)` returning `(path) => MappedString` | +| `path/index.ts` | `createPath(host)` for `absolute()`; pure path-string helpers remain direct exports | +| `system/index.ts` | `createSystem(host)` returning the full system namespace | + +### Submodules with no host (pure exports) + +`markdown/`, `text/mapped.ts` + `text/text.ts` + `text/ranged.ts` + `text/binary-search.ts`, +`format/`, `console/`, `crypto/`, `jupyter/` (the conversion logic; figure +writes from `jupyter/assets.ts` go through the host). + +### What this plan implements + +- [ ] `src/platform.ts` — the `PlatformHost` interface, no implementations. +- [ ] All factory exports listed above. +- [ ] A `denoHost: PlatformHost` in `@quarto/engine-host-deno`, wiring + `Deno.readTextFileSync`, `Deno.Command`, etc. + +### What this plan does NOT implement + +- A WASM-side host. That's a future piece of work and needs its own package + (working name: `@quarto/engine-host-wasm`). Design invariant for this plan: + nothing in `@quarto/api` prevents that from being written later. +- A decision about *how* TS engine extensions run in the browser (Web + Worker? Sandbox? Different mechanism entirely?). See + `crates/quarto-system-runtime` and the deepwiki analysis + (2026-04-22 discussion) for context. The `PlatformHost` abstraction is + necessary but not sufficient for browser hosting. + +## Work Items + +### Phase 2A: Package skeleton + @quarto/api/markdown + +- [ ] Create `ts-packages/quarto-api/package.json`: + ```json + { + "name": "@quarto/api", + "version": "0.1.0", + "type": "module", + "exports": { + ".": "./src/index.ts", + "./text": "./src/text/index.ts", + "./markdown": "./src/markdown/index.ts", + "./jupyter": "./src/jupyter/index.ts", + "./format": "./src/format/index.ts", + "./path": "./src/path/index.ts", + "./system": "./src/system/index.ts", + "./console": "./src/console/index.ts", + "./crypto": "./src/crypto/index.ts" + }, + "dependencies": { "yaml": "^2.0.0" } + } + ``` + Run `npm install` from the repo root after creating the package. + +- [ ] Create `ts-packages/quarto-api/tsconfig.json` matching the repo's existing + ts-packages conventions. + +- [ ] Create `src/index.ts` (optional aggregate re-export — convenience for + callers who want everything under one import). + +- [ ] Create `src/markdown/` — clean reimplementations of the markdown + utilities. These power `quarto.markdownRegex.*` on the API surface. + + - [ ] `src/markdown/extract-yaml.ts` — `extractYaml(markdown: string) → Metadata`: + - Find YAML front matter between `---` delimiters + - Parse with `yaml` package + - Support the `!expr` YAML tag (Quarto convention: treat as raw string) + - Handle edge cases: no front matter, empty front matter, malformed YAML + - Reference: Quarto 1's `readYamlFromMarkdown` in `src/core/yaml.ts` + - ~50 lines + tests + + - [ ] `src/markdown/pandoc-attr.ts` — `pandocAttrParseText(text: string) → PandocAttr | null`: + - Parse Pandoc-style attributes on code blocks (`{#id .class key=value}`) + - Used by `partition.ts` + - Reference: Quarto 1's `pandocAttrParseText` in `src/core/pandoc/pandoc-partition.ts` + + - [ ] `src/markdown/partition.ts` — `partition(markdown: string) → PartitionedMarkdown`: + - Split markdown into: yaml front matter, heading (first heading if present), body + - Uses `extractYaml` for the YAML part + - Reference: Quarto 1's `partitionMarkdown` in `src/core/pandoc/pandoc-partition.ts` + - ~200 lines + tests + + - [ ] `src/markdown/languages.ts` — `getLanguages(markdown: string) → Set`: + - Extract language specifiers from fenced code blocks via regex + - Match `` ```{language} `` patterns + - Pure regex, zero dependencies + - Reference: Quarto 1's `languagesInMarkdown` — literally a copy, it's self-contained + - ~30 lines + tests + + - [ ] `src/markdown/break-quarto-md.ts` — `breakQuartoMd(markdown: string) → QuartoMdCell[]`: + - Split markdown into alternating code cells and markdown cells + - Parse cell options from YAML comments within code blocks + - **Simplified from Quarto 1**: use `yaml` package directly for cell + options, no schema validation, no tree-sitter + - Handle: fenced code blocks, shortcodes, raw blocks + - Reference: Quarto 1's `breakQuartoMd` in `src/core/lib/break-quarto-md.ts` + - Note: the Julia engine does NOT use this method, but the engine + template does and other engines will + - ~300 lines + tests + + - [ ] `src/markdown/index.ts` — barrel re-export. + +- [ ] Write unit tests for each function. Check existing ts-packages for the + test runner convention (likely Vitest, since that's what the Rust monorepo's + other ts-packages use). If nothing is set up yet, use Vitest and add a + `test` script to `package.json`. + +### Phase 2B: @quarto/api/text (including MappedString) + +In q2's design, `quarto.text` and `quarto.mappedString` are **two separate +QuartoAPI namespaces** (Q1 compat) powered by a **single underlying module**, +`@quarto/api/text`. Layout mirrors Q1's groupings in `@quarto/types/src/text.ts`. + +- [ ] `src/text/types.ts` — types only (matches Q1's + `@quarto/types/src/text.ts`): + ```typescript + export interface Range { start: number; end: number; } + export interface MappedString { + readonly value: string; + readonly fileName?: string; + readonly map: (index: number, closest?: boolean) => StringMapResult; + } + export type StringMapResult = { + index: number; + originalString: MappedString; + } | undefined; + export type EitherString = string | MappedString; + export interface RangedSubstring { substring: string; range: Range; } + export type StringChunk = string | Range | MappedString; + ``` + +- [ ] `src/text/binary-search.ts` — `glb(arr, value)` helper (copy from Q1's + `src/core/lib/binary-search.ts`, trivially small). + +- [ ] `src/text/ranged.ts` — `RangedSubstring`, `rangedLines` (internal, + used by `mapped.ts`). Copy from Q1's `src/core/lib/ranged-text.ts`. + +- [ ] `src/text/text.ts` — plain-string utilities that power `quarto.text.*`: + - `lines(text)` → `text.split("\n")` + - `trimEmptyLines(lines, trim)` → filter empty lines from start/end + - `lineBreakPositions(text)`, `indexToLineCol(text)`, `matchAll` (internal + helpers used by `mapped.ts`, also exposed) + +- [ ] `src/text/yaml-text.ts` — `asYamlText(metadata)` → `yaml.dump(metadata)`. + +- [ ] `src/text/html-preserve.ts` — `postProcessRestorePreservedHtml(options)` + — replace preservation markers with original HTML. + +- [ ] `src/text/mapped.ts` — core `MappedString` implementation. Direct + port of Q1's `src/core/lib/mapped-text.ts` (~200 lines): + - `asMappedString(str, fileName?)` — base with identity `.map()` + - `mappedSubstring(source, start, end)` — shifted view that delegates to source + - `mappedConcat(strings)` — concatenation with binary-search `.map()` + - `mappedString(source, pieces, fileName?)` — sugar over the above + - `mappedLines(ms, keepNewLines?)`, `mappedNormalizeNewlines(ms)`, + `mappedIndexToLineCol(ms)` + +- [ ] `src/text/mapped-from-file.ts` — factory for FS-backed MappedString: + ```typescript + import type { PlatformHost } from "../platform.ts"; + export function createMappedStringFromFile(host: PlatformHost) { + return (path: string): MappedString => + asMappedString(host.fs.readTextFileSync(path), path); + } + ``` + The only FS-touching function in `text/` — isolating it behind the host + factory keeps the rest of `text/` (MappedString algebra, text utilities) + pure and portable. + +- [ ] `src/text/index.ts` — barrel re-export. + +- [ ] Write tests for the text utilities and the MappedString algebra + (including `.map()` composition through multiple `mappedConcat`/ + `mappedSubstring` layers). + +**Note on source-map rehydration:** The `fromSourceMap` function that +constructs a MappedString from `TsSourceMapEntry[]` byte ranges does +**not** live in `@quarto/api/text`. It is q2-specific (needed because +`source_map` crossed the protocol boundary) and lives in +`@quarto/engine-host-deno/src/mapped-source.ts`. It is built on top of the +primitives from `@quarto/api/text` (`asMappedString`, `mappedConcat`) and +maintains a base-per-file cache so all pieces sharing a source file share +one base `MappedString` object. See Plan 1b for the algorithm. + +### Phase 2C: Remaining @quarto/api submodules + engine-host wiring + +Populate the remaining `@quarto/api` submodules, then flesh out the stub +`quarto-api.ts` in `@quarto/engine-host-deno` that Plan 1a created. + +**Construction model** — no registry pattern, and I/O runs through a +`PlatformHost` plugged in by the consumer: + +```typescript +// engine-host-deno/src/deno-host.ts +import type { PlatformHost } from "@quarto/api/platform"; +export const denoHost: PlatformHost = { + fs: { + readTextFileSync: Deno.readTextFileSync, + writeFileSync: (p, c) => Deno.writeFileSync(p, + typeof c === "string" ? new TextEncoder().encode(c) : c), + exists: (p) => { try { Deno.statSync(p); return true; } catch { return false; } }, + }, + process: { + exec: async (cmd, args, opts) => + await new Deno.Command(cmd, { args, ...opts }).output(), + }, + realPath: Deno.realPathSync, + isInteractive: Deno.stdin.isTerminal(), + isCI: !!Deno.env.get("CI"), +}; + +// engine-host-deno/src/quarto-api.ts +import { denoHost } from "./deno-host.ts"; +import * as text from "@quarto/api/text"; +import { createMappedStringFromFile } from "@quarto/api/text/mapped-from-file"; +import * as markdown from "@quarto/api/markdown"; +import * as format from "@quarto/api/format"; +import * as pathMod from "@quarto/api/path"; +import { createPath } from "@quarto/api/path"; +import { createSystem } from "@quarto/api/system"; +import * as quartoConsole from "@quarto/api/console"; +import * as crypto from "@quarto/api/crypto"; +// jupyter wired in by Plan 3 + +export function buildQuartoAPI(context: EngineHostContext): QuartoAPI { + const mappedStringFromFile = createMappedStringFromFile(denoHost); + const pathNamespace = { ...pathMod, ...createPath(denoHost) }; + const systemNamespace = createSystem(denoHost); + return { + text: buildTextNamespace(text), + mappedString: buildMappedStringNamespace(text, mappedStringFromFile), + markdownRegex: buildMarkdownRegexNamespace(markdown), + format: buildFormatNamespace(format, context), + path: buildPathNamespace(pathNamespace, context), + system: buildSystemNamespace(systemNamespace, context), + console: buildConsoleNamespace(quartoConsole), + crypto: buildCryptoNamespace(crypto), + jupyter: buildJupyterNamespace(...), // Plan 3 + }; +} +``` + +Direct construction, plain nested object. Quarto 1's +`QuartoAPIRegistry`/`register.ts` infrastructure is **not** being ported. +The same pattern in a future `@quarto/engine-host-wasm` replaces `denoHost` +with a VFS-backed host and leaves the rest of the assembly identical. + +#### src/format/ + +- [ ] `src/format/index.ts` — pure computation from `format.pandoc.to` string. + Each method accepts an optional `format` parameter (Q1 compat) and falls back + to a context-provided default: + ```typescript + export function isHtmlCompatible(format, defaultTo: string): boolean { + const to = format?.pandoc?.to ?? defaultTo; + return ["html", "html4", "html5", "revealjs", "s5", "slideous", "slidy", + "epub", "epub2", "epub3"].some(f => to.startsWith(f)); + } + export function isLatexOutput(format, defaultTo: string): boolean { … } + // etc. + ``` + Engine-host's `buildFormatNamespace` closes over `context.format.pandocTo` + to provide the default. + +#### src/path/ + +Pure path-string helpers are direct exports (no host dependency): +- [ ] `toForwardSlashes(path)` → `path.replace(/\\/g, "/")` +- [ ] `dirAndStem(file)` → `[dirname(file), basename(file, extname(file))]` +- [ ] `inputFilesDir(input)` → `join(dirname(input), basename(input, ext) + "_files")` +- [ ] `isQmdFile(file)` → check extension + +Host-dependent piece is a factory: +- [ ] `createPath(host)` → returns an `absolute(path)` that uses + `host.realPath` when available, otherwise returns `path` unchanged. The + WASM-host implementation of `absolute` will typically be identity (VFS + paths are already canonical); the Deno host delegates to `Deno.realPathSync`. + +Engine-host-deno's `buildPathNamespace` composes these with the +`runtime(subdir)` and `resource(...parts)` closures that use +`context.runtimeDir` / `context.resourceDir`. + +#### src/system/ + +All of `system/` is host-dependent — expose as a factory: + +- [ ] `createSystem(host: PlatformHost)` returning: + - `execProcess(options)` → uses `host.process.exec()` if available, else throws + `"execProcess is not available in this environment"` + - `tempContext()` — creates temp dir via `host.fs` (Deno: `Deno.makeTempDirSync`; + browser: a VFS-scoped directory). Returns a cleanup helper. + - `onCleanup(handler)` — pure JS; registers in a module-level list processed + on exit / dispose. + - `isInteractiveSession()` → `host.isInteractive` + - `runningInCI()` → `host.isCI` + +Engine-host-deno's `buildSystemNamespace` wraps the factory output with a +`pandoc(args, stdin?)` convenience that uses `context.pandocPath`. (In the +future WASM host, `pandoc` can't be spawned — either route through a WASM +build of pandoc or throw unsupported.) + +#### src/console/ + +- [ ] `src/console/index.ts`: + - `info(message, options?)` → `console.error("[INFO]", message)` (goes to stderr) + - `warning(message, options?)` → `console.error("[WARN]", message)` + - `error(message, options?)` → `console.error("[ERROR]", message)` + - `withSpinner(options, fn)` → log start/end, call fn (no actual spinner in subprocess) + + The `options` parameter (`{ bold, newline, indent, ... }`) is accepted but + formatting hints are best-effort in a subprocess context (no terminal control). + +#### src/crypto/ + +- [ ] `src/crypto/index.ts` — `md5Hash(content)`. Note: Web Crypto doesn't + natively support MD5. Options: `npm:md5`, `node:crypto` (available in + Deno), or a small pure-JS MD5. + +#### Wire-up in engine-host + +- [ ] Update `@quarto/engine-host-deno/src/quarto-api.ts` to import from + `@quarto/api/*` and assemble the QuartoAPI object as shown above. +- [ ] Add `@quarto/api` as a dependency of `@quarto/engine-host-deno`. +- [ ] Write a smoke test that invokes each namespace method with trivial + inputs to verify the wiring. + +### Phase 2D: @quarto/types and import map + +Following Quarto 1's model, engine extensions import types via +`import type { ... } from "@quarto/types"`. These are erased during the +build step (bundling), so no runtime code is needed — just a `.d.ts` file +referenced by the import map. + +- [ ] Define our type definitions in `ts-packages/quarto-types/` (or + `resources/extension-build/quarto-types.d.ts`): + - `ExecutionEngineDiscovery`, `ExecutionEngineInstance` + - `ExecuteOptions`, `ExecuteResult`, `ExecutionTarget` + - `QuartoAPI` (with our namespace signatures) + - `MappedString`, `PartitionedMarkdown`, `Metadata` + - `EngineProjectContext` +- [ ] For compatibility with Quarto 1 extensions: our type names should match + Quarto 1's. +- [ ] Create `resources/extension-build/import-map.json`: + ```json + { + "imports": { + "@quarto/types": "./quarto-types.d.ts", + "path": "jsr:@std/path", + "fs/exists": "jsr:@std/fs/exists", + "encoding/base64": "jsr:@std/encoding/base64" + } + } + ``` +- [ ] Create `resources/extension-build/deno.json`: + ```json + { + "compilerOptions": { "strict": true, "lib": ["deno.ns", "DOM", "ES2021"] }, + "importMap": "./import-map.json" + } + ``` +- [ ] Copy `quarto-types.d.ts` into `resources/extension-build/` during the + build process. + +## Portability constraints + +The goal is that `@quarto/api` can later move to its own repo and/or be +consumed by Quarto 1. To keep that option cheap, the plan commits to: + +1. **Self-contained package.** Own `package.json`, own `tsconfig.json`, own + tests. No `../../some-q2-thing` imports. +2. **No Deno globals inside `@quarto/api`.** All platform I/O goes through + the `PlatformHost` interface. `@quarto/api` itself never references + `Deno.*`, `globalThis.Deno`, `node:*`, or platform-specific modules. + This is the invariant that lets the same package run under + `@quarto/engine-host-deno` today and `@quarto/engine-host-wasm` later. +3. **Bootstrap mechanism NOT ported.** We port implementations only, not + Q1's `QuartoAPIRegistry` / `register.ts` / `getQuartoAPI()` singleton. + Engine-host builds the QuartoAPI via direct construction. Q1, if it + adopts `@quarto/api` later, keeps its own registry and just replaces the + provider bodies with calls into our submodules. +4. **ESM + package.exports map.** Committed from day one so bundlers and + Q1's future import paths don't have to be renegotiated. +5. **No cross-package coupling to engine-host.** `@quarto/api` never imports + from `@quarto/engine-host-deno` — the dependency runs only one direction. + q2-specific glue (protocol types, source-map rehydration) lives in + `@quarto/engine-host-deno`, not here. +6. **Publish target deferred, but shape committed.** We don't publish to npm + or jsr yet. When we do, no structural changes should be needed — just add + a `publishConfig` and a version. +7. **Scope naming.** `@quarto/api` is intended to coexist with Q1's existing + `@quarto/types`. If Q1's package layout changes, we coordinate naming. + +## Design Notes + +### Why rewrite instead of extract? + +Quarto 1's markdown utilities are tangled with the YAML schema/validation +system (~30+ files), tree-sitter, mapped-text infrastructure, and lodash. +Clean rewrites of the actual logic are ~50-300 lines per function, vs. +extracting would require bringing 30+ files and stubbing their dependencies. +The logic itself is straightforward — it's the plumbing that's tangled. + +### Why a single `@quarto/api` package? + +Earlier drafts of this plan proposed `@quarto/markdown`, `@quarto/jupyter`, +and `@quarto/engine-host-deno` as sibling packages. We consolidated to a single +`@quarto/api` package with subpath exports because: + +- One `package.json`, one version, one dep list (`yaml` lives once). +- Q1 adopts once (`import { ... } from "@quarto/api/markdown"`), not three times. +- MappedString has a natural home (`@quarto/api/text`) without debate over which + sibling owns it. +- Cross-submodule deps (if any) don't require version coordination. +- Tree-shaking via subpath exports gives the same bundle cost as separate packages. +- `git subtree split` can later extract a subdirectory if one piece outgrows the rest. + +`@quarto/engine-host-deno` stays separate because it's q2-specific (stdio protocol, +source-map rehydration). + +### YAML cell options: simplified approach + +Quarto 1's `partitionCellOptions` uses the full YAML schema system to +validate cell options. Our `breakQuartoMd` skips validation and just parses +YAML with `js-yaml`. This means: +- Cell options with typos won't be caught at parse time +- That's fine — validation happens elsewhere in q2's pipeline +- The engine extension just needs the parsed options as a plain object + +### Future: Quarto 1 adoption + +`@quarto/api` is designed so that Q1 could import it in place of its own +tangled implementations (`src/core/lib/mapped-text.ts`, +`src/core/pandoc/pandoc-partition.ts`, etc.). The API signatures match Q1's +existing interfaces. If/when Q1 adopts it, Q1's `QuartoAPIRegistry` keeps +its existing shape but providers delegate to `@quarto/api` submodules. + +## Success Criteria + +- [ ] `@quarto/api` package exists with package.json, tsconfig, exports map +- [ ] `@quarto/api/platform` defines the `PlatformHost` interface +- [ ] No `Deno.*` or `node:*` references anywhere inside `@quarto/api` + (verified by a simple grep check in CI or xtask lint) +- [ ] `@quarto/api/markdown` with extractYaml, partition, getLanguages, breakQuartoMd +- [ ] `@quarto/api/text` with MappedString + helpers (full `.map()` provenance), + `createMappedStringFromFile(host)` factory for FS-backed construction +- [ ] `@quarto/api/format`, `/path`, `/system`, `/console`, `/crypto` all implemented; + `path` and `system` expose host-factory constructors (`createPath`, `createSystem`) +- [ ] `@quarto/engine-host-deno` provides a `denoHost: PlatformHost` and uses it + to build every namespace +- [ ] All QuartoAPI namespaces except jupyter wired into engine-host-deno's + `quarto-api.ts` via direct construction (no registry) +- [ ] `fromSourceMap` in engine-host-deno reconstructs provenance from + byte-range entries +- [ ] `@quarto/types` definitions in place for engine extension imports +- [ ] All tests pass diff --git a/claude-notes/plans/2026-04-16-ts-engine-extensions-subprocess.md b/claude-notes/plans/2026-04-16-ts-engine-extensions-subprocess.md new file mode 100644 index 000000000..aa50a2996 --- /dev/null +++ b/claude-notes/plans/2026-04-16-ts-engine-extensions-subprocess.md @@ -0,0 +1,582 @@ +# Grand Plan: TypeScript Engine Extensions for q2 (v2 — Subprocess) + +## Overview + +Implement TypeScript engine extensions in q2 using a **Deno subprocess** architecture. Engine extensions are TypeScript modules (following Quarto 1's API) that q2 discovers, loads via a long-lived Deno subprocess, and communicates with via a JSON message protocol over stdin/stdout. + +**Goal:** A user places a TypeScript engine extension in `_extensions/my-engine/` with an `_extension.yml`, and q2 discovers it, spawns a Deno engine-host process, queries the engine for file/language claims, and delegates code cell execution to it. + +**Validation target:** The Julia engine extension from Quarto 1 (`julia-engine.ts`). + +**Key design choice:** Subprocess over embedded Deno. This eliminates the need to add deno ext crates (deno_fs, deno_process, deno_net, deno_crypto, etc.) to q2's binary. The engine extension runs in a full Deno environment — all standard APIs available, all Deno standard library modules importable, TypeScript transpilation handled by Deno natively. The QuartoAPI is implemented in TypeScript in a platform-agnostic `@quarto/api` package; platform I/O goes through a `PlatformHost` interface, with the Deno-specific host (`@quarto/engine-host-deno`) providing the `denoHost` that calls `Deno.readTextFileSync`, `Deno.Command`, etc. A future `@quarto/engine-host-wasm` can provide a VFS-backed host for in-browser hosting without changes to `@quarto/api`. + +## Architecture + +### Shared subprocess lifecycle (one Deno process per project render) + +All TS engine extensions share one Deno subprocess. Each engine is loaded +via a separate `Init` message. All other messages carry `engine: ""` +for routing. + +``` +q2 (Rust) engine-host (shared Deno subprocess) +───────── ──────────────────────────────────── +spawn deno engine-host-deno.js ─────→ start, wait for init messages + +send: { type: "init", ──────→ load julia-engine.js + enginePath: "julia..." } call engine.init(quartoAPI), engine.launch(ctx) +recv: { type: "ready", ←────── engine "julia" loaded + engineMeta: { name: "julia", ... } } + +send: { type: "init", ──────→ load marimo-engine.js + enginePath: "marimo..." } call engine.init(quartoAPI), engine.launch(ctx) +recv: { type: "ready", ←────── engine "marimo" loaded + engineMeta: { name: "marimo", ... } } + +send: { type: "claimsLanguage", ───→ route to julia instance + engine: "julia", call julia.claimsLanguage("julia") + language: "julia" } +recv: { type: "claimsLanguageResult", ←── return result + result: 1 } + +send: { type: "execute", ──────→ route to julia instance + engine: "julia", call julia.execute(opts) + options: { ... } } +recv: { type: "executeResult", ←──── return ExecuteResult + result: { markdown, supporting, ... } } + +send: { type: "shutdown" } ──────→ clean up all engines, exit +``` + +### Where things live + +``` +q2 repo +├── crates/ +│ └── quarto-core/src/engine/ +│ ├── ts_engine.rs ← TsEngine struct (implements ExecutionEngine) +│ ├── ts_protocol.rs ← JSON message types + serialization +│ └── ts_process.rs ← Deno process management +│ +├── ts-packages/ ← npm workspace (already exists) +│ ├── quarto-engine-host-deno/ ← NEW: Deno subprocess harness (q2 native binary) +│ │ ├── src/ +│ │ │ ├── host.ts ← stdin/stdout protocol handler +│ │ │ ├── deno-host.ts ← PlatformHost impl (Deno.* APIs) +│ │ │ ├── quarto-api.ts ← QuartoAPI assembly from @quarto/api + denoHost +│ │ │ ├── mapped-source.ts ← MappedString rehydration from source_map +│ │ │ └── engine-loader.ts ← dynamic import + validation +│ │ └── package.json +│ │ +│ ├── (quarto-engine-host-wasm/ ← FUTURE, out of scope: browser harness for hub-client) +│ │ +│ └── quarto-api/ ← NEW: shared QuartoAPI implementations +│ ├── package.json ← single package, subpath exports +│ └── src/ +│ ├── platform.ts ← PlatformHost interface (no impls) +│ ├── text/ ← MappedString + text utilities +│ ├── markdown/ ← extractYaml, partition, getLanguages, breakQuartoMd +│ ├── jupyter/ ← notebook → markdown + helpers (Plan 3) +│ ├── format/ ← isHtmlCompatible, isLatexOutput, … +│ ├── path/ ← dirAndStem, isQmdFile, createPath(host) +│ ├── system/ ← createSystem(host): execProcess, tempContext, … +│ ├── console/ ← info, warning, error, withSpinner +│ └── crypto/ ← md5Hash +│ +└── quarto-cli (reference only, at ~/src/quarto-cli) + └── packages/quarto-types/ ← existing, pure .d.ts, use as-is +``` + +## Protocol Design + +### Message format + +All messages are JSON objects, one per line on stdin/stdout. Each has a `type` field. + +**Rust → Deno (stdin):** +```typescript +// Initialize the engine host with context and engine module path +{ type: "init", enginePath: string, context: EngineHostContext } + +// Discovery queries +{ type: "claimsLanguage", language: string, firstClass?: string } +{ type: "claimsFile", file: string, ext: string } + +// File conversion (non-QMD files only — engine reads its native format) +{ type: "markdownForFile", file: string } + +// Execution (q2 provides pre-extracted metadata and source_map) +{ type: "execute", options: TsExecuteOptions } + +// Post-execute +{ type: "dependencies", options: TsDependenciesOptions } +{ type: "postprocess", options: TsPostProcessOptions } +{ type: "postRender", file: TsRenderResultFile } + +// Queries +{ type: "filterFormat", source: string, options: TsRenderOptions, format: TsFormatInfo } +{ type: "canKeepSource", target: TsExecutionTarget } +{ type: "intermediateFiles", input: string } +{ type: "executeTargetSkipped", target: TsExecutionTarget, format: TsFormatInfo } + +// Lifecycle +{ type: "shutdown" } +``` + +**Deno → Rust (stdout):** +```typescript +// Initialization response +{ type: "ready", engineMeta: { name, canFreeze, generatesFigures, validExtensions } } + +// Discovery responses (separate types for language vs file claims) +{ type: "claimsLanguageResult", result: number | null } // null=no claim, 1=default, negative=low priority. Harness converts: false→null, true→1, number→Math.trunc() to i32 +{ type: "claimsFileResult", result: boolean } + +// File conversion response (includes source_map for provenance back to original file) +{ type: "markdownForFileResult", result: { value: string, fileName?: string, sourceMap: TsSourceMapEntry[] } } + +// Execution response +{ type: "executeResult", result: TsExecuteResult } + +// Post-execute responses +{ type: "dependenciesResult", result: TsDependenciesResult } +{ type: "postprocessResult" } +{ type: "postRenderResult" } + +// Query responses +{ type: "filterFormatResult", result: TsFormatInfo } +{ type: "canKeepSourceResult", result: boolean } +{ type: "intermediateFilesResult", result: string[] | undefined } +{ type: "executeTargetSkippedResult" } + +// Errors +{ type: "error", message: string, stack?: string } +``` + +**Optional protocol message:** +- `partitionedMarkdown()` — **also on Rust `ExecutionEngine` trait** (Jupyter + needs it for ipynb-filters). Default impl: `partition(markdownForFile(file).value)`. + See [ipynb-filters research plan](2026-04-23-ipynb-filters-and-engine-partitioning.md). + +**Harness-internal** (not protocol messages): +- `target()` — the harness checks if the TS engine implements it, calls it + if so, uses the result (including opaque `data` cookie like kernelspec) to + build `ExecutionTarget` for `execute()`. All Deno-side. Falls back to + constructing from `TsExecuteOptions` fields. + +**Not in protocol:** +- `run()` — interactive mode, deferred to future plan + +See Plan 1a for the full protocol type definitions and rationale. + +### TsExecuteResult + +The execution response maps to q2's `ExecuteResult` plus additional fields from Quarto 1's `ExecuteResult`: + +```typescript +interface TsExecuteResult { + markdown: string; + supporting: string[]; + filters: string[]; + includes?: { + inHeader: string[]; + beforeBody: string[]; + afterBody: string[]; + }; + postProcess?: boolean; + preserve?: Record; // HTML chunks to protect from Pandoc + engineDependencies?: Record>; // widget deps for later resolution + pandoc?: Record; // pandoc options to merge +} +``` + +**Field rationale:** The Julia engine (our validation target) populates `preserve`, `engineDependencies`, `pandoc`, and `includes` via `quarto.jupyter.toMarkdown()`. These fields are essential for interactive outputs: +- `preserve` + `postProcess`: raw HTML (widgets, DataFrames) replaced with placeholders before Pandoc, restored after +- `engineDependencies`: Jupyter widget CSS/JS deps, resolved later into `PandocIncludes` +- `pandoc`: format-specific pandoc options from `toMarkdown()` +- `includes`: immediate CSS/JS includes (alternative to deferred `engineDependencies`) + +q2's `ExecuteResult` struct currently has `includes` and `needs_postprocess` but not `preserve`, `engineDependencies`, or `pandoc`. These will be added to the Rust struct as part of Plan 1a. `metadata` (from Quarto 1's `ExecuteResult`) is NOT included — the Julia engine doesn't populate it. + +**Note on TsExecuteOptions:** q2 provides pre-extracted `metadata` (from the +AST) and a `source_map` (byte-range entries from Plan 0's SourceInfo) in the +execute options. The engine-host harness uses these to construct the +`ExecutionTarget` and `MappedString` the engine expects — the engine never +calls `target()` or `partitionedMarkdown()`. See Plan 1a for details. + +**Logs (stderr):** Engine extensions write logs to stderr. The `quarto.console.*` methods write to stderr with level prefixes so q2 can parse and display them. Engine's own `Deno.stdout.writeSync` calls are redirected to stderr (the harness reassigns stdout). + +### EngineHostContext (sent once at init) + +This is a q2 invention — Quarto 1 engines run in-process and don't need serialized +context. `EngineHostContext` carries only static/global and project-level info. +Per-document and per-format info arrives in per-call messages (`TsExecuteOptions`, etc.). + +In Quarto 1, `QuartoAPI` is a global singleton with mostly stateless utility +functions (format helpers take `Format` as parameter, not global state). The +`EngineProjectContext` passed to `launch()` carries project dir and config. +`EngineHostContext` combines the bootstrap info for both. + +```typescript +interface EngineHostContext { + // Project info (→ EngineProjectContext for launch()) + projectDir?: string; + isSingleFile: boolean; + + // Paths for QuartoAPI construction (q2-specific, can't be derived by Deno) + resourceDir: string; // q2's bundled resources + runtimeDir: string; // q2's runtime directory + pandocPath: string; // absolute path to pandoc binary + + // System info for QuartoAPI (q2 is source of truth) + isInteractiveSession: boolean; + runningInCI: boolean; + quartoVersion: string; +} +``` + +Most QuartoAPI methods are implemented in TypeScript using `context` + the platform host. No callbacks to Rust needed. + +## QuartoAPI Implementation Strategy + +### Implemented in TypeScript (no Rust callbacks) + +Implementations live in `@quarto/api` subpaths. Platform I/O is factored +through `PlatformHost` (see Plan 2) so the same package works under +`@quarto/engine-host-deno` today and `@quarto/engine-host-wasm` later. + +| Namespace | Source | Host use | +|-----------|--------|----------| +| `quarto.path` | `@quarto/api/path` — pure string helpers + `createPath(host)` | `host.realPath` for `absolute()`; otherwise none. Engine-host-deno layer adds `runtime(subdir)` / `resource(...parts)` closures over `context.runtimeDir` / `context.resourceDir`. | +| `quarto.format` | `@quarto/api/format` — pure computation from `format.pandoc.to` | None. Format info arrives per-call in `TsExecuteOptions.format`, not at init time. Matches Quarto 1 (stateless). | +| `quarto.system` | `@quarto/api/system` — `createSystem(host)` | `host.process.exec` for `execProcess`; `host.fs` for `tempContext`. Throws "not available" in environments where `host.process` is undefined. Engine-host-deno wraps `execProcess` with a `pandoc(args, stdin?)` convenience that uses `context.pandocPath`. | +| `quarto.console` | `@quarto/api/console` — pure, writes to stderr with level prefixes | None. | +| `quarto.crypto` | `@quarto/api/crypto` — `md5Hash` via Web Crypto (`crypto.subtle.digest`) or small pure-JS dep | None. Works in Deno, browser, Node. | +| `quarto.mappedString` | `@quarto/api/text` (same module as `quarto.text`). `fromFile` routed through `createMappedStringFromFile(host)`. | `host.fs.readTextFileSync` for `fromFile`. For `options.target.markdown`, engine-host-deno's `mapped-source.ts` rehydrates a `MappedString` with `.map()` provenance from the `source_map` byte-range entries in `TsExecuteOptions`. | +| `quarto.markdownRegex` | `@quarto/api/markdown` — clean reimplementations | None. Pure parsing. | +| `quarto.jupyter` | `@quarto/api/jupyter` — `createJupyter(host)` (Plan 3) | `host.fs.writeFileSync` for figure image writes; `host.fs.readTextFileSync` for `isPercentScript` / `percentScriptToMarkdown`. The rest of the jupyter conversion logic is pure. | +| `quarto.text` | `@quarto/api/text` — pure string utilities | None. | + +### What's NOT needed from Rust + +None of the QuartoAPI methods call back to Rust. All context flows one way +(Rust → Deno at init time, per-call options on each execute). This keeps +the protocol simple and unidirectional during execution. + +### What's NOT in `@quarto/api` itself + +No references to `Deno.*` or `node:*`. Those live in the platform-specific +host implementations (`@quarto/engine-host-deno/src/deno-host.ts` today, +`@quarto/engine-host-wasm/src/wasm-host.ts` in the future). + +## Quarto 1 API Compatibility + +We are NOT targeting 100% Quarto 1 API compatibility. The `@quarto/types` package provides the interface definitions. Our implementations may differ from Quarto 1's in: +- Simplified type signatures (flattened options objects) +- Missing methods that no current engine uses (stubbed with helpful errors) +- Different behavior in edge cases (especially around YAML validation) + +Engine extensions may need minor adaptation to work with q2. The Julia engine is our compatibility benchmark. + +## New TypeScript Packages + +### @quarto/api + +A single shared package holding clean reimplementations of every QuartoAPI +namespace's underlying logic. Organized as subpaths rather than sibling +packages — one `package.json`, one version, one dep list, `exports` map for +targeted imports. Designed to be portable: consumable today by +`@quarto/engine-host-deno`, and in the future by Quarto 1 itself. + +Subpaths: + +- `@quarto/api/text` — `MappedString` (type + impl), `asMappedString`, + `mappedSubstring`, `mappedConcat`, `mappedLines`, `mappedNormalizeNewlines`, + `mappedIndexToLineCol`, `mappedStringFromFile`, plus plain-text utilities + (`lines`, `trimEmptyLines`, `asYamlText`, `postProcessRestorePreservedHtml`). + Powers both `quarto.text` and `quarto.mappedString` on the runtime API surface. +- `@quarto/api/markdown` — `extractYaml`, `partition`, `pandocAttrParseText`, + `getLanguages`, `breakQuartoMd`. Powers `quarto.markdownRegex`. +- `@quarto/api/jupyter` — `jupyterToMarkdown`, `isPercentScript`, + `percentScriptToMarkdown`, `assets`, `resultIncludes`, + `resultEngineDependencies`, plus supporting modules (display-data, tags, + labels, preserve, widgets, pandoc-id, cell-options). Powers `quarto.jupyter`. + Subject of Plan 3. +- `@quarto/api/format` — `isHtmlCompatible`, `isLatexOutput`, `isMarkdownOutput`, + `isIpynbOutput`, etc. Pure computation from `pandoc.to` strings. +- `@quarto/api/path` — `dirAndStem`, `isQmdFile`, `toForwardSlashes`, + `inputFilesDir` as pure exports; `createPath(host)` for host-dependent + `absolute()`. +- `@quarto/api/system` — `createSystem(host)` returning `execProcess`, + `tempContext`, `onCleanup`, `isInteractiveSession`, `runningInCI`. All + I/O goes through the platform host — in Deno, `host.process.exec` wraps + `Deno.Command`; in a future WASM host, `execProcess` throws "not available". +- `@quarto/api/console` — `info`, `warning`, `error`, `withSpinner` (stderr writers). +- `@quarto/api/crypto` — `md5Hash`. + +**Runtime-platform expectations:** `@quarto/api` itself contains **no** +references to `Deno.*`, `node:*`, or other platform-specific APIs. All I/O +(file read/write, subprocess execution, path canonicalization) goes through +a small `PlatformHost` interface that the consumer plugs in. This lets the +same `@quarto/api` package serve two environments: + +- `@quarto/engine-host-deno` — the Deno subprocess harness delivered by + Plan 1a. Provides a `denoHost` that calls `Deno.readTextFileSync`, + `Deno.Command`, etc. +- `@quarto/engine-host-wasm` — **future work**, not part of these plans — + the in-browser harness for hub-client. Would provide a `wasmHost` backed + by q2's VFS (`vfsReadFile`, `vfsAddFile`, …). Subprocess-dependent + QuartoAPI methods (`quarto.system.execProcess`, `quarto.system.pandoc`) + would throw "not available in this environment". + +See Plan 2 for the `PlatformHost` interface definition and which submodules +are pure vs. host-dependent. + +**Bootstrap:** No registry pattern. `@quarto/engine-host-deno` imports the +submodules directly and builds the `QuartoAPI` object as plain nested +record — the QuartoAPI registry/singleton infrastructure from Quarto 1 +(`src/core/api/registry.ts`, `register.ts`) is **not** being ported. +Implementations only. + +Dependencies: `yaml` (used by `markdown` and `jupyter` for YAML parsing). + +### @quarto/engine-host-deno + +The Deno-side subprocess harness — q2-specific glue for the native binary, +never shared with Q1. Reads JSON messages from stdin, dispatches to the +loaded engine module, writes responses to stdout. Named `-deno` explicitly +to make room for a future `@quarto/engine-host-wasm` sibling. + +- `host.ts` — main loop: read messages, dispatch, write responses +- `deno-host.ts` — the `PlatformHost` implementation backed by Deno APIs + (`Deno.readTextFileSync`, `Deno.Command`, `Deno.realPathSync`, etc.) +- `quarto-api.ts` — imports `@quarto/api/*` submodules, threads `denoHost` + through the host-taking factories, assembles the `QuartoAPI` object from + `EngineHostContext`. Wires both `quarto.text` and `quarto.mappedString` + from the same `@quarto/api/text` module. +- `mapped-source.ts` — rehydrates a `MappedString` from the `source_map` + byte-range entries in `TsExecuteOptions`. Uses a base-per-file cache so + multiple pieces sharing a source file share one base `MappedString` + object. q2-specific (needed because `source_map` crossed the protocol + boundary as data, not in-memory references). +- `engine-loader.ts` — dynamically imports the engine TS module, validates + it exports `ExecutionEngineDiscovery`. + +Dependencies: `@quarto/api`, `@quarto/types`. + +### @quarto/engine-host-wasm (future, out of scope) + +The in-browser equivalent for hub-client. Would provide a `wasmHost` +backed by q2's VFS JS bindings and run inside a Web Worker (or equivalent +sandbox — the mechanism is an open design question). Not part of Plans 1-4. +Called out here to fix the naming and to document that the `PlatformHost` +abstraction in Plan 2 is what enables it without rework to `@quarto/api`. + +## Sub-Plans + +| Plan | Sessions | Dependencies | Can start | +|------|----------|-------------|-----------| +| [Plan 0: Include Expansion & SourceInfo](2026-04-18-plan0-include-expansion-and-source-info.md) | 2-3 | Nothing | Now | +| [Plan 1a: Protocol & Rust Core](2026-04-16-plan1a-protocol-and-core.md) | 1-2 | Plan 0 | After Plan 0 | +| [Plan 1b: @quarto/engine-host-deno (Deno harness)](2026-04-16-plan1b-engine-host-deno.md) | 1 | Plan 1a Phase 1 (protocol schema) | After Plan 1a Phase 1 | +| [Plan 1c: Extension Integration & E2E](2026-04-16-plan1c-extension-integration.md) | 1-2 | Plans 1a, 1b | After Plans 1a + 1b | +| [Plan 2: @quarto/api (text, markdown, utilities) + QuartoAPI assembly](2026-04-16-quarto-markdown-and-api.md) | 1-2 | Nothing | Now (parallel with Plan 0) | +| [Plan 3: @quarto/api/jupyter](2026-04-16-quarto-jupyter.md) | 2-3 | Plan 2A (package skeleton) | After Plan 2A | +| [Plan 4: Julia Validation](2026-04-16-julia-validation.md) | 1-2 | Plans 1a, 1b, 1c, 2, 3 | After all others | +| **Total** | **9-15** | | | + +### Dependency graph + +``` +Plan 0 (Include Expansion & SourceInfo) + │ + ▼ +Plan 1a (Protocol & Rust Core) + │ │ + │ └─(Phase 1: protocol schema frozen)─→ Plan 1b (@quarto/engine-host-deno) + │ │ + └────────────────────┬───────────────────────────┘ + ▼ + Plan 1c (Extension Integration & E2E) + │ + │ +Plan 2 (@quarto/api: text, markdown, utilities) ─┐ + │ ├──→ Plan 4 (Julia Validation) +Plan 3 (@quarto/api/jupyter) ────────────────────┘ +``` + +**Plan 0** is a prerequisite for the TS engine protocol design. It delivers +pre-engine include shortcode expansion (correctness fix for all engines) and +SourceInfo on the engine interface (parity with Quarto 1's MappedString). +Plans 2 and 3 (TypeScript packages) can proceed in parallel since they don't +touch the Rust engine interface. Plan 1a depends on Plan 0 because the +protocol types must account for source mapping decisions made in Plan 0. + +**Plan 1a** delivers the Rust-side infrastructure: protocol types, +subprocess management, `ExecutionEngine` trait extensions, `TsEngine` struct. + +**Plan 1b** is the Deno-side harness (`@quarto/engine-host-deno` package): +esbuild bundle, `host.ts` main loop, `deno-host.ts` PlatformHost impl, +`mapped-source.ts` MappedString rehydration, `quarto-api.ts` stub, +`engine-loader.ts`. Gated only on Plan 1a Phase 1 (the frozen JSON schema); +otherwise runs in parallel with 1a Phases 2-4. + +**Plan 1c** wires 1a + 1b into the extension system: `_extension.yml` +engine parsing, `deno bundle` build step, registry migration to +`StageContext`, 4-phase detection rewrite, and the echo engine end-to-end +test (which exercises the full stack). + +Plans 1a, 1b, 2, and 3 each have a **standalone core** that is independent: +- Plan 1a: Rust infrastructure (Phases 1-4) +- Plan 1b: Deno harness package (Phases 1-4 of 1b, its own numbering) +- Plan 2: `@quarto/api` package skeleton + `text/` and `markdown/` subpaths + types (Phases 2A, 2B, 2D) +- Plan 3: `@quarto/api/jupyter` subpath (Phases 3A-3D, 3F) + +Plan 3 depends on Plan 2 creating the `@quarto/api` package (package.json, +exports map, tsconfig); after that, `jupyter/` is just another subdirectory. + +**Integration phases** that depend on Plan 1b's engine-host package: +- Plan 1c Phase 3 (echo engine E2E test) needs Plan 1b fully working plus + minimal types from Plan 2D +- Plan 2C (wire QuartoAPI namespaces into engine-host) replaces Plan 1b's stubs +- Plan 3E (wire jupyter into engine-host) replaces Plan 1b's jupyter stub + +Plan 4 integrates everything and depends on all plans being complete. + +### Critical path + +With parallel execution: Plan 0 (2-3 sessions) → Plan 1a Phase 1 (schema +freeze) → Plans 1a Phases 2-4, 1b, 2, and 3 in parallel → Plan 1c (1-2 +sessions) → Plan 4 (1-2 sessions) = **6-10 sessions elapsed**. + +## Key File Paths (q2) + +| Component | Path | +|-----------|------| +| Engine trait | `crates/quarto-core/src/engine/traits.rs` | +| Engine registry | `crates/quarto-core/src/engine/registry.rs` | +| Engine detection | `crates/quarto-core/src/engine/detection.rs` | +| Engine execution stage | `crates/quarto-core/src/stage/stages/engine_execution.rs` | +| Existing TS packages | `ts-packages/` | +| npm workspace config | `package.json` (workspaces: `ts-packages/*`) | + +## Key File Paths (quarto-cli, for reference) + +**Note:** quarto-cli is a separate repository at `~/src/quarto-cli`. It is the TypeScript/Deno implementation of Quarto 1. We reference it for API definitions and implementation patterns but do not import from it. + +| Component | Path | +|-----------|------| +| Julia engine | `src/resources/extension-subtrees/julia-engine/src/julia-engine.ts` | +| Julia _extension.yml | `src/resources/extension-subtrees/julia-engine/_extensions/julia-engine/_extension.yml` | +| @quarto/types | `packages/quarto-types/` | +| QuartoAPI types | `packages/quarto-types/src/quarto-api.ts` | +| ExecutionEngineDiscovery | `src/execute/types.ts` | +| fileExecutionEngine | `src/execute/engine.ts` (4-phase algorithm) | +| markdownExecutionEngine | `src/execute/engine.ts` (language scanning + claiming) | +| resolveEngines | `src/execute/engine.ts` (engine registration + ordering) | +| resolveEngineExtensions | `src/project/project-context.ts` (extension → project config) | +| languagesWithClasses | `src/core/pandoc/pandoc-partition.ts` (code block language extraction) | +| Extension schema | `src/resources/schema/extension.yml` (contributes.engines schema) | +| jupyterToMarkdown | `src/core/jupyter/jupyter.ts` | +| JupyterToMarkdownResult | `src/core/jupyter/types.ts` (includes htmlPreserve, dependencies, pandoc) | +| Markdown regex | `src/core/api/markdown-regex.ts` → `src/core/pandoc/pandoc-partition.ts`, `src/core/lib/break-quarto-md.ts` | +| Engine template | `src/resources/create/extensions/engine/src/qstart-filesafename-qend.ejs.ts` | + +## Extension Build Model + +Following Quarto 1's approach, engine extensions go through a **build step** before execution: + +1. **Build time:** Engine extension TS source is bundled into a single `.js` file using `deno bundle` with an import map. The import map resolves: + - `@quarto/types` → type definitions (erased during bundling, type-only imports) + - `"path"` → `jsr:@std/path` + - `"fs/exists"` → `jsr:@std/fs/exists` + - `"encoding/base64"` → `jsr:@std/encoding/base64` + All dependencies are inlined into the bundle. + +2. **Runtime:** The Deno subprocess loads the bundled `.js` file via dynamic `import()`. No import map or TS transpilation needed at execution time. + +The `@quarto/engine-host-deno` harness is also bundled into a single `.js` file that includes `@quarto/api` (all subpaths) and the harness glue. This bundle is built using **esbuild** (matching the existing `quarto-system-runtime` pattern), checked into git at `ts-packages/quarto-engine-host-deno/dist/engine-host-deno.js`, and embedded in the q2 binary via `include_str!()`. At runtime, the embedded JS is written to a temp file and executed with `deno run --allow-all`. + +q2 provides: +- `resources/extension-build/import-map.json` — import map for building extensions +- `resources/extension-build/deno.json` — Deno config pointing to the import map +- A `quarto build-ts-extension` command (or auto-build during render) + +## Runtime Dependency + +**Deno must be on PATH** to use TS engine extensions (same model as pandoc — assumed present, not bundled). q2 should: +- Check for `deno` in PATH when a TS engine is needed +- Provide a clear error message if Deno is not found +- Document the Deno requirement for engine extension users +- The core q2 binary (markdown, knitr, jupyter engines) does NOT require Deno + +**Future:** Deno may be bundled with q2 when a distribution/installer pipeline is built. For now, q2 has no installer infrastructure (pandoc is also assumed on PATH). Tests that require Deno should be skipped if it's absent, following the same pattern as pandoc-dependent tests. + +## Engine Discovery and Language Claiming + +q2 follows Quarto 1's 4-phase engine detection algorithm, with one modernization. + +### Quarto 1's algorithm (reference) + +In Quarto 1, `fileExecutionEngine()` uses this 4-phase algorithm: +1. **Extension claims (`claimsFile`)**: Each engine's `claimsFile(file, ext)` is checked — `.ipynb` → jupyter, `.rmd` → knitr +2. **YAML declaration**: Check for explicit `engine:` key or engine-name top-level key in frontmatter +3. **Language scanning (`claimsLanguage`)**: Extract languages from code blocks via `languagesWithClasses()`, call each engine's `claimsLanguage(language, firstClass?)`. Returns `false` (no claim), `true` (priority 1), or a number (custom priority). Highest score wins. +4. **Fallback**: If no engine claims any language but there are non-handler languages (not `ojs`, etc.), default to Jupyter. Otherwise, use markdown engine. + +`claimsLanguage` receives both the language identifier and an optional `firstClass` extracted from code block attributes (e.g., `{python .marimo}` → language="python", firstClass="marimo"). This allows engines to make class-specific claims. + +Engine ordering affects ties: `_quarto.yml` `engines:` list controls priority order. User-specified engines come first; standard engines follow. First engine to achieve the highest score wins. + +### q2's algorithm (modernized) + +q2 implements the same 4-phase algorithm with one change: + +**Modernization — Jupyter no longer claims "julia" explicitly.** In Quarto 1, Jupyter's `claimsLanguage` returned `true` for "julia", creating a conflict with the Julia engine extension (both claimed priority 1, winner depended on registration order). In q2: + +- The built-in Jupyter engine does NOT explicitly claim any language via `claims_language()` (matching Quarto 1's behavior for Python, which also relied on the Phase 4 fallback). Critically, it no longer claims `"julia"` either. +- The Julia engine extension claims `"julia"`. When installed, it wins cleanly with no ordering tricks needed. +- **Phase 4 is preserved:** If no engine explicitly claims a language but there ARE code blocks with unrecognized computational languages, Jupyter is the fallback — because Jupyter can handle any language it has a kernel for. This is correct behavior, not a hack: Jupyter is a universal kernel executor. If no Julia extension is installed and a doc has `{julia}` blocks, Jupyter handles it via its Julia kernel (same as Quarto 1's end result, but without the conflicting explicit claim). +- If there are no computational code blocks at all, the document falls through to the markdown engine. + +### Implementation on the `ExecutionEngine` trait + +Discovery methods are added to the `ExecutionEngine` trait with defaults (Option A from design discussion): + +```rust +pub trait ExecutionEngine: Send + Sync { + // ... existing methods ... + + /// File extensions this engine can handle (e.g., [".ipynb", ".py"]). + /// Used as a pre-filter before any claiming logic. + fn valid_extensions(&self) -> Vec { Vec::new() } + + /// Whether this engine claims a language. + /// Returns None (no claim), or Some(priority) where higher wins. + /// Negative values mean "I'll take this if no one else will." + /// `first_class` is the first CSS class from code block attributes + /// (e.g., "marimo" from `{python .marimo}`). + fn claims_language(&self, _language: &str, _first_class: Option<&str>) -> Option { None } + + /// Whether this engine claims a file by extension. + fn claims_file(&self, _file: &str, _ext: &str) -> bool { false } +} +``` + +Built-in engines implement these directly: +- **Jupyter**: `claims_file(".ipynb") → true`. No `claims_language` overrides — Jupyter does not explicitly claim any language via Phase 3. Instead, it acts as the Phase 4 fallback for all unclaimed computational languages (matching Quarto 1, where Python also falls through to the Phase 4 Jupyter fallback). **Deliberate q2 interface change:** Jupyter no longer claims "julia" explicitly (Quarto 1 did this as a backward-compatibility hack), removing the priority conflict with the Julia extension. +- **Knitr**: `claims_language("r") → Some(1)`, `claims_file(".rmd") → true` +- **Markdown**: returns defaults (claims nothing) +- **TsEngine**: forwards queries to the Deno subprocess + +Language + class information is extracted from the **parsed AST** (not regex), since q2 already has pampa for parsing. + +### Engine ordering and registration + +Following Quarto 1's `resolveEngineExtensions()` + `resolveEngines()` pipeline: + +1. Extension discovery scans `_extensions/` for `contributes: engines:` entries +2. Extension engines are merged into `projectConfig.engines` +3. `_quarto.yml` `engines:` list controls ordering (user-specified engines first) +4. Standard engines (knitr, jupyter, markdown) are appended after user-specified engines +5. When two engines have the same priority score for a language, iteration order wins diff --git a/claude-notes/plans/2026-04-18-plan0-include-expansion-and-source-info.md b/claude-notes/plans/2026-04-18-plan0-include-expansion-and-source-info.md new file mode 100644 index 000000000..afe292553 --- /dev/null +++ b/claude-notes/plans/2026-04-18-plan0-include-expansion-and-source-info.md @@ -0,0 +1,614 @@ +# Plan 0: Pre-Engine Include Expansion & Engine SourceInfo + +**Grand plan:** [2026-04-16-ts-engine-extensions-subprocess.md](2026-04-16-ts-engine-extensions-subprocess.md) +**Depends on:** Nothing +**Blocks:** Plans 1a, 1b, 2, 3, 4 (all TS engine extension work) +**Estimated sessions:** 2-3 + +## Overview + +Establish two prerequisites for TS engine extensions (and improve correctness +for all engines): + +1. **Include shortcode expansion before engine execution** — Without this, + `{{< include file.qmd >}}` containing code cells is broken: the included + code never reaches the engine. Quarto 1 resolves includes pre-engine (in + TypeScript, at the text level). q2 currently resolves ALL shortcodes + post-engine (in `ShortcodeResolveTransform`, stage 6). This plan adds a + new pre-engine stage that resolves include shortcodes at the AST level. + +2. **SourceInfo for the text engines receive** — Quarto 1 passes a + `MappedString` (string + source provenance) to engines. q2's engine + interface currently passes a bare `&str` with no provenance. This plan + adds `SourceInfo` to `ExecutionContext`, constructed by having the QMD + writer track which AST node produced each byte range in its output. This + gives engines (or q2's error handling) the ability to map error positions + back to original source files — including through include boundaries. + +### Why this must come before the TS engine plans + +The TS engine protocol design depends on knowing: +- Whether the engine receives text with includes already expanded (yes) +- Whether source mapping data is available for the engine's input (yes) +- Who is responsible for error position remapping (open — see below) + +Without Plan 0, the protocol would be designed around the wrong assumptions. + +## Context: How Quarto 1 handles this + +In Quarto 1, shortcodes are expanded in **two phases**: + +- **Pre-engine (TypeScript layer):** `include` shortcodes are resolved at the + text level in `projectResolveFullMarkdownForFile()` → `expandIncludes()`. + The result is a `MappedString` that tracks which ranges came from which + files. This expanded text is what the engine receives. + +- **Post-engine (Lua filter pipeline):** All other shortcodes (`meta`, `var`, + `env`, custom extension shortcodes) survive engine execution unchanged and + are expanded during the `quarto-pre` Lua filter phase in Pandoc. + +Quarto 1's `MappedString` provenance is available to engines but **barely +used for error remapping**: Jupyter passes errors through raw, knitr remaps +only one specific R error pattern, Julia builds sourceRanges but doesn't +remap errors on the way back. Only OJS actually uses it for error line +translation. + +Our goal is **parity with Quarto 1**: SourceInfo exists, is correct, is +available to engines, and is thoroughly tested — but systematic error +remapping is deferred as an open question. + +## Pipeline change + +``` +Current: + Parse → MetadataMerge → PreEngineSugaring → EngineExec → CompileThemeCss → + UserFilters(pre) → AstTransforms(ALL shortcodes) → UserFilters(post) → + RenderHtmlBody → ApplyTemplate + +After Plan 0: + Parse → MetadataMerge → IncludeExpansion(NEW) → PreEngineSugaring → EngineExec → + CompileThemeCss → UserFilters(pre) → AstTransforms(non-include shortcodes) → + UserFilters(post) → RenderHtmlBody → ApplyTemplate +``` + +**Ordering rationale:** `IncludeExpansion` must run before `PreEngineSugaring` +because included files may contain cross-references that `PreEngineSugaring` +needs to index (it seeds `RefTypeRegistry` and builds `CrossrefIndex`). Both +must run before `EngineExec`. + +**Note on non-QMD files:** Plan 1c restructures the pipeline entry point so +that `claimsFile` (engine detection Phase 1) runs before `ParseDocument`. +If an engine claims a non-QMD file (e.g., `.jl` percent script), the engine +converts it to QMD text via `markdownForFile`, and that text enters the +pipeline at `ParseDocument`. Plan 0's work (include expansion, QMD writer +SourceInfo) applies equally regardless of whether the input started as QMD +or was converted from another format — by the time include expansion runs, +the AST is the same either way. + +## Prerequisites + +### Prerequisite: `Block::source_info()` and `Inline::source_info()` accessors + +Every `Block` variant's inner struct has `pub source_info: SourceInfo`, but +there is no enum-level accessor. The codebase has **4 independent copies** of +a `get_block_source_info` / `block_source_info` free function scattered +across pampa (`writers/incremental.rs`, `pandoc/treesitter.rs`, +`pandoc/treesitter_utils/postprocess.rs`, `lua/diagnostics.rs`) plus one +in a test file (`tests/incremental_writer_investigation.rs`). A similar +`get_inline_source_info` exists in `lua/diagnostics.rs`. + +**Action (separate commit before Phase 0A/0B):** +1. Add `impl Block { pub fn source_info(&self) -> &SourceInfo }` in + `quarto-pandoc-types/src/block.rs` — match on all variants. Every + variant's inner struct has a `source_info` field, so this always + returns a reference. +2. **Restructure `Inline::Attr`** to include a `source_info: SourceInfo` + field. Currently `Inline::Attr(Attr, AttrSourceInfo)` is a tuple + variant where `Attr` is `(String, Vec, LinkedHashMap<...>)` — + it has no `SourceInfo`. `AttrSourceInfo` tracks per-component source + locations and has a `combined()` method that merges them into a single + `SourceInfo`. Add a precomputed `source_info: SourceInfo` field + (computed from `AttrSourceInfo::combined()` at construction time). + Update all `Inline::Attr` construction sites. +3. Add `impl Inline { pub fn source_info(&self) -> &SourceInfo }` in + `quarto-pandoc-types/src/inline.rs` — match on all variants. With the + `Attr` restructuring, every variant now has a `source_info` field to + borrow from, giving both enums the uniform `-> &SourceInfo` API. +4. Replace all 4+ duplicate free functions with calls to the new methods. +5. Run `cargo nextest run --workspace` to verify no regressions. + +This is a standalone cleanup that Phase 0B (QMD writer tracking) and +Phase 0A (AST walking) both benefit from. + +## Commit order + +Four commits, in this order: + +1. **Prerequisite** — `Inline::Attr` restructure + `source_info()` accessors + dedup +2. **Phase 0B** — QMD writer `write_with_source_info` + its unit tests +3. **Phase 0C wiring** — `ExecutionContext` fields + `serialize_ast_to_qmd` update + single-file SourceInfo tests +4. **Phase 0A** — Include expansion stage + all include tests (unit tests, integration tests, and tests that verify SourceInfo through include boundaries) + +Phases 0B and 0C are include-independent SourceInfo infrastructure. They +land first so that when Phase 0A adds include expansion, the full +SourceInfo chain (include → QMD writer → ExecutionContext → map_offset) +can be tested end-to-end in the same commit. + +## Work Items + +### Phase 0A: Include shortcode expansion stage (commit 4) + +New pipeline stage that resolves `include` shortcodes in the AST before +engine execution. + +**Note:** q2 does not currently have an `include` shortcode handler. The +existing `ShortcodeResolveTransform` handles `meta` (built-in Rust handler) +and Lua-based shortcodes, but not `include`. This phase implements include +handling from scratch. + +- [x] Create `crates/quarto-core/src/stage/stages/include_expansion.rs`: + ```rust + pub struct IncludeExpansionStage; + ``` + Implements `PipelineStage`. Input/output: `DocumentAst`. + +- [x] Implement AST walking to find include shortcodes: + - Walk all blocks looking for `Paragraph` nodes whose sole inline content + is an `Inline::Shortcode` where `name == "include"` + - **Block-level only:** Quarto 1 only expands includes that occupy an + entire line (`isBlockShortcode` in `parse-shortcode.ts` uses regex + `^\s*{{< ... >}}\s*$`). The AST-level equivalent is: the shortcode is + the only child of a `Paragraph`. If an include shortcode appears inline + among other inlines (e.g., `text {{< include f.qmd >}} more`), leave it + in place — `ShortcodeResolveTransform` will encounter it later and can + warn or pass it through. This matches Quarto 1 behavior where inline + includes are silently not expanded. + - Extract the file path from the shortcode's first positional argument + +- [x] Implement include resolution using the **parse-then-remap pattern** + (same approach as `EngineExecutionStage` at engine_execution.rs:267-311): + + 1. Resolve the included file path relative to the including file's directory + 2. Read the included file via `ctx.runtime.file_read(&path)` — use the + `SystemRuntime` trait (not `std::fs::read`) so this works in WASM contexts + 3. Parse the included file with pampa (`readers::qmd::read`). This creates + a fresh `ASTContext` where the included file is `FileId(0)`. + 4. **Remap FileIds**: The main document already uses `FileId(0)` (and + possibly higher for earlier includes). Register the included file in + the main document's `ast_context.source_context` to get a new `FileId` + (e.g., `FileId(N)`). Then call `remap_file_ids` on the parsed AST to + shift `FileId(0) → FileId(N)`. Use the existing + `quarto_ast_reconcile::remap_file_ids` or the `SourceInfo::remap_file_ids` + method. + 5. **Register in BOTH SourceContexts on DocumentAst** (they serve different + purposes and both need the included file): + - `doc_ast.ast_context.source_context` — carry over the `FileInformation` + from the parsed file's `ASTContext` (needed for `map_offset` line/column + resolution). Use `add_file_with_info` if `FileInformation` is available, + otherwise `add_file`. + - `doc_ast.source_context` — register with `add_file(path, Some(content))` + so ariadne can render error snippets from included files. + - Both registrations must use the same `FileId(N)`. + 6. Merge the included file's `ast_context.filenames` into the main document's + `ast_context.filenames`. + 7. Replace the `Paragraph` containing the shortcode with the included + file's blocks (after stripping the included file's YAML frontmatter — + i.e., take `parsed.blocks` and discard `parsed.meta`) + +- [x] Handle recursive includes: + - After splicing, re-walk the newly inserted nodes for more include shortcodes + - Maintain a set of files currently being included (detect circular includes) + - Error on circular includes with a clear diagnostic + +- [x] Handle edge cases: + - Missing included file → diagnostic error (not a panic) + - Include path outside project directory → warning + - Include of a file with YAML frontmatter: Quarto 1 strips the frontmatter + of included files. Match this behavior. (The included file's YAML is + parsed but discarded; only its body content is spliced.) + +- [x] Wire into pipeline in `pipeline.rs`: + - Insert `IncludeExpansionStage` between `MetadataMergeStage` and + `PreEngineSugaringStage` (before `EngineExecutionStage`). Include + expansion must precede PreEngineSugaring because included files may + contain cross-references that need indexing. + - `ShortcodeResolveTransform` in `AstTransforms` continues to handle + `meta`, `var`, `env`, Lua shortcodes — it simply won't encounter any + include shortcodes (they're already resolved) + +- [x] Tests: + - Unit test: simple include — paragraph with shortcode replaced by included + file's blocks + - Unit test: recursive include — file A includes file B which includes file C + - Unit test: circular include — A includes B includes A → error diagnostic + - Unit test: missing file → error diagnostic, not panic + - Unit test: included file's AST nodes have SourceInfo pointing to the + included file (correct FileId, byte offsets) + - Unit test: include inside a code block is NOT expanded (it's literal text + in CodeBlock.text, not a Shortcode node) + - Unit test: block-level include (paragraph with only the shortcode) → + included blocks replace the paragraph + - Unit test: inline include (shortcode among other inlines in a paragraph) + → shortcode is NOT expanded, left in place for ShortcodeResolveTransform + (matches Quarto 1 behavior where only whole-line includes are expanded) + - Unit test: included file with YAML frontmatter → frontmatter stripped, + only body blocks spliced + - Integration test: document with `{{< include >}}` containing a code cell + → after include expansion, the code cell's CodeBlock is present in the AST + - Integration test (end-to-end SourceInfo through includes): full pipeline + with include → engine receives text → SourceInfo maps byte offset in + engine input back to included file + - Integration test: verify `map_offset` works for a code block from an + included file (offset in serialized QMD → correct file + line in the + included source) + +### Phase 0B: QMD writer produces SourceInfo (commit 2) + +Extend the QMD writer to build a `SourceInfo::Concat` that maps byte ranges +in the serialized output to the `source_info` of the AST nodes that produced +them. + +**Quarto 1 reference:** Quarto 1 doesn't need this because it does include +expansion at the text level (producing a MappedString directly). In q2, +include expansion happens at the AST level, and the engine receives +serialized QMD — so the serializer must construct the provenance. + +- [x] Add `write_with_source_info` to pampa's QMD writer: + ```rust + // New public API — owns buffer, returns bytes + SourceInfo + pub fn write_with_source_info( + pandoc: &Pandoc, + ) -> Result<(Vec, SourceInfo), Vec> + ``` + The existing `write(&Pandoc, &mut impl Write)` is unchanged. All ~19 + other callsites are unaffected. + + The new function owns a `Vec` internally so it can read `buf.len()` + at block boundaries. It calls a `write_impl_tracked` variant of the + 15-line top-level loop that records `buf.len()` before/after each + `write_block` call. The entire `write_block` → `write_inline` → 40 + internal helper tree is shared and untouched. + +- [x] Track provenance for the **entire output** with no gaps: + + The Concat must tile the full output buffer so that `SourceInfo::concat()` + (which computes cumulative `offset_in_concat` values) produces correct + offsets. Any gap would shift all subsequent pieces, causing lookups by + engine-reported byte offsets to land in the wrong piece. + + `write_impl_tracked` works as follows: + + ```rust + let mut pieces = Vec::new(); + + // Track YAML frontmatter as a single piece + let meta_start = buf.len(); + let mut need_newline = write_config_value_meta(&pandoc.meta, buf, ctx)?; + let meta_len = buf.len() - meta_start; + if meta_len > 0 { + pieces.push((pandoc.meta.source_info.clone(), meta_len)); + } + + // Track each block — include preceding blank line in measurement + for block in &pandoc.blocks { + let start = buf.len(); + if need_newline { writeln!(buf)?; } + write_block(block, buf, ctx)?; + pieces.push((block.source_info().clone(), buf.len() - start)); + need_newline = true; + } + + Ok(SourceInfo::concat(pieces)) + ``` + + By measuring each block from **before** the separating blank line, the + pieces tile the entire buffer with no gaps. The blank line between blocks + is attributed to the following block (at worst one line off within a + block, which is acceptable). YAML frontmatter is tracked via + `pandoc.meta.source_info`. + + **Known limitation:** After `MetadataMergeStage`, `pandoc.meta.source_info` + may be `SourceInfo::default()` due to a pre-existing bug in + `MergedConfig::materialize()` that drops map container source_info + (tracked as `bd-2mxo`). This means byte offsets landing in the YAML + frontmatter region of the serialized QMD will resolve to "origin unknown" + rather than pointing to the actual frontmatter location. Individual + metadata scalar values retain their source_info, but the container does + not. Fixing this is orthogonal to Plan 0. + + Per-top-level-block is sufficient for the engine use case: engine errors + report line numbers, lines fall within blocks, and blocks carry SourceInfo + pointing to their origin file (including through include boundaries). + Finer granularity (per-inline) can be added later if needed by + instrumenting the internal write functions. + + **Accuracy note:** Code block content is written verbatim + (`write!(buf, "{}", codeblock.text)`), so within-block byte offsets for + code are exact. Only fencing/attribute formatting may differ from the + original source, making within-block mapping approximate by at most a + few bytes of fence overhead. For engine error reporting (which targets + code lines, not fence lines), this is negligible. + +- [x] Handle blocks with `SourceInfo::default()` (no provenance): + record a Concat piece with default SourceInfo. `map_offset` through + default SourceInfo resolves to `FileId(0)` offset 0 — callers should + treat unexpected locations as "origin unknown." + +- [x] The wrapper `serialize_ast_to_qmd` in `engine_execution.rs` calls the + new API and returns `(String, SourceInfo)`: + ```rust + fn serialize_ast_to_qmd(ast: &Pandoc) -> Result<(String, SourceInfo), PipelineError> + ``` + +- [x] Tests: + - Unit test: serialize a simple AST, verify the returned SourceInfo is a + Concat with pieces covering the **entire** output (frontmatter + blocks) + - Unit test: given a byte offset in a block's region, `map_offset` + resolves to the correct original file and position + - Unit test: given a byte offset in the YAML frontmatter region, + `map_offset` resolves to the frontmatter's source location (note: + after metadata merge, `meta.source_info` may be default due to + `bd-2mxo` — test with a pre-merge AST or a manually constructed + meta with real source_info) + - Unit test: AST with blocks from two different files (simulating include + expansion) → SourceInfo maps to the correct file for each block + - Unit test: Concat piece lengths sum to total buffer length (no gaps) + - Unit test: round-trip accuracy — parse a file, serialize, pick a code + block's offset in serialized text, verify it maps back to approximately + the right location in the original file + +### Phase 0C: SourceInfo in ExecutionContext (commit 3) + +Wire the QMD writer's SourceInfo into the engine interface. Include- +dependent integration tests are deferred to Phase 0A's commit. + +- [x] Add `source_info` field to `ExecutionContext`: + ```rust + pub struct ExecutionContext { + // ... existing fields ... + + /// Source provenance for the input text. + /// + /// Maps byte offsets in the input `&str` back to original source + /// files (possibly through include expansion boundaries). + /// + /// Use `source_info.map_offset(byte_offset, source_context)` to + /// resolve a position in the engine's input text to the original + /// file, line, and column. + /// + /// Currently not used by any engine for error remapping — see + /// "Open Questions" in the plan. Available for future use and + /// for parity with Quarto 1's MappedString. + pub source_info: SourceInfo, + } + ``` + +- [x] Add `source_context: Arc` to `ExecutionContext`: + `map_offset` requires a `&SourceContext` to resolve `FileId`s to paths + and compute line/column. The engine (or q2's error handling) needs both + `source_info` and `source_context`. + + **Decision:** Clone into `Arc` at `ExecutionContext` construction. + `DocumentAst.source_context` remains owned (`SourceContext`, not + `Arc`) — the include expansion stage needs to mutate it + (register included files), and it's simpler to keep it owned during the + mutable pipeline phases. At `EngineExecutionStage` time, the context is + finalized (all includes resolved), so we clone into `Arc` once: + `Arc::new(doc_ast.source_context.clone())`. This is a one-time clone per + pipeline run, not a hot path. + + No changes to `DocumentAst`'s field types. No migration of downstream + consumers. + + For TsEngine (subprocess engines), `TsEngine::execute()` extracts the + serialized source map entries from `source_info` for the protocol — + the full SourceContext stays Rust-side. + +- [x] Update `EngineExecutionStage::run()`: + - `serialize_ast_to_qmd` now returns `(String, SourceInfo)` + - Pass the `SourceInfo` into `ExecutionContext` when constructing it + - Clone `DocumentAst.source_context` into `Arc::new(...)` and pass to + `ExecutionContext` (one-time clone; context is finalized after include + expansion) + +- [x] Update `ExecutionContext::new()` to accept SourceInfo (with a default + of `SourceInfo::default()` for backward compatibility in tests) + +- [x] **Do NOT change the `ExecutionEngine` trait signature.** SourceInfo is + in `ExecutionContext`, not a separate parameter. Existing engine + implementations don't need to change. + +- [x] Tests (single-file, no includes — include-dependent tests are in 0A): + - Unit test: `ExecutionContext` with SourceInfo — construct, verify field + accessible + - Unit test: `EngineExecutionStage` populates SourceInfo from QMD writer + - Integration test: document WITHOUT includes → SourceInfo maps back to + the original file + - Integration test: verify `map_offset` works for offsets at: + - Start of the engine input + - A code block in the middle + - End of the engine input + - Integration test: simulate engine error reporting — given a line number + in the serialized QMD, convert to byte offset, call `map_offset`, verify + correct file + line in original source + +## Open Question: Error remapping responsibility + +When an engine reports an error with a line number, who translates it back +to the original source position? + +**Options:** +1. **q2 intercepts engine errors** — The engine returns + `ExecutionError::ExecutionFailedAtLines` with line numbers in the + serialized QMD. q2 uses SourceInfo + SourceContext to remap before + displaying. +2. **Engine does it** — TS engines receive the source map in + `TsExecuteOptions` (reconstructed as MappedString by the harness). + Built-in engines have SourceInfo in `ExecutionContext`. Either way + the engine can remap positions itself before returning errors. +3. **QuartoAPI utility** — For TS engines, the QuartoAPI provides a + `quarto.sourceMap.resolve(line, col)` method backed by the + MappedString the harness constructed from the source map. +4. **Nobody does it** — Matching Quarto 1's current (lax) behavior, error + line numbers are approximate. Engines report positions in the text they + received; users must mentally map to their source. + +Option 1 is the most natural for q2's architecture (q2 holds the SourceInfo, +engines are oblivious). Options 2-3 are needed if the engine wants to +provide real-time error locations during long-running execution (like Julia's +`buildSourceRanges`). These are not mutually exclusive. + +**Decision deferred.** Plan 0 ensures the SourceInfo exists and is correct. +Error remapping can be implemented incrementally as engines need it. + +## Design Notes + +### SourceInfo and MappedString are the same concept + +q2's `SourceInfo` and Quarto 1's `MappedString` are different +implementations of the same idea: source provenance tracking. Both answer +"for any byte offset in this derived text, where did it come from in the +original source?" + +- **SourceInfo**: a serializable tree (Concat of pieces → Original file + ranges). Designed for cross-process communication. `map_offset()` traces + the tree. +- **MappedString**: closures and object references. `.map(index)` returns + `{ index, originalString }`. Designed for in-process use, never + serialized. + +The protocol naturally uses the SourceInfo representation (byte-range +pieces), and the engine-host harness reconstructs a MappedString from it. +See Plan 1a Phase 1 (`TsSourceMapEntry`) and Phase 5 (MappedString +reconstruction) for the crossing. + +### Percent scripts: engine-side, not q2-side + +Percent-script conversion (`.jl` files with `# %%` markers → QMD) is +engine-specific: different engines check different file extensions and use +different comment syntaxes. In q2, the engine handles this via `claimsFile` ++ `markdownForFile` in the pre-parse detection flow (see Plan 1c Phase 2). + +q2 does not need to know about percent or spin script formats. The engine +converts its file format to QMD, q2 parses the QMD, and the pipeline +proceeds. Source mapping for the conversion step is the engine's +responsibility (Quarto 1 doesn't do it either — percent script conversion +loses provenance, producing an identity-mapped MappedString with no +filename). + +Built-in engines do not currently implement `claims_file` or +`markdown_for_file`. Adding percent/spin script support to built-in +engines is documented as future work in Plan 1c. + +### Why AST-level, not text-level + +Quarto 1 does include expansion at the text level (regex/pattern matching +on raw markdown). q2 does it at the AST level because: +- q2 parses first, then works on the AST — there is no "expanded text" +- The parser already identifies shortcode nodes, so no regex needed +- Source tracking composes naturally: included file → pampa parse with + FileId → AST nodes with SourceInfo::Original → QMD writer Concat +- Avoids the pitfalls of text-level expansion (matching shortcodes inside + code blocks, handling nested delimiters, etc.) + +### SourceInfo chain + +The QMD writer's Concat tiles the **entire** serialized output with no +gaps — frontmatter is tracked via `pandoc.meta.source_info`, each block +is tracked via `block.source_info()`, and inter-block whitespace is +included in each block's measured range. + +``` +byte offset in serialized QMD (what the engine receives) + → QMD writer's Concat piece → AST node's source_info + → Original(FileId for included_file.qmd, byte range) + → SourceContext.map_offset() → file path, line, column +``` + +For nodes from the main document, the chain is shorter: +``` +byte offset → Concat piece → Original(FileId for main.qmd, byte range) + → file path, line, column +``` + +For offsets in YAML frontmatter: +``` +byte offset → Concat piece (frontmatter) → meta.source_info + → Original(FileId for main.qmd, frontmatter byte range) + → file path, line, column +``` +**Note:** Due to `bd-2mxo`, `meta.source_info` is currently +`SourceInfo::default()` after metadata merge, so this chain resolves +to "origin unknown" until that bug is fixed. + +### Dual SourceContext in DocumentAst + +`DocumentAst` has two separate `SourceContext` fields: + +1. **`ast_context.source_context`** — created by pampa's reader. Contains + `FileInformation` (line break indices) that `map_offset()` needs for + byte-offset → line/column conversion. This is what AST nodes' `FileId`s + resolve against. + +2. **`source_context` (top-level field)** — created by `ParseDocumentStage`. + Contains file content strings (via `add_file(path, Some(content))`). + Used by ariadne for rendering error snippets with source context. + +This separation is semi-intentional: different layers own different contexts. +`SourceContext.add_file(path, Some(content))` stores **both** content and +`FileInformation`, so a single entry can serve both purposes — but the two +contexts are separate objects created at different times. + +**For include expansion:** each included file must be registered in **both** +contexts with the same `FileId`, so that: +- `map_offset` can resolve AST nodes from included files (needs #1) +- Error messages can show source snippets from included files (needs #2) + +Unifying the two `SourceContext`s is desirable long-term but out of scope +for Plan 0. + +### Parse-then-remap pattern for multi-file merging + +When parsing an included file, `pampa::readers::qmd::read` always creates +a fresh `ASTContext` where the file is `FileId(0)`. To merge into the main +document (which already uses `FileId(0)` for the main file), we use the +**parse-then-remap** pattern established by `EngineExecutionStage` +(engine_execution.rs:267-311): + +1. Parse the included file → gets its own `FileId(0)` +2. Register in the main `SourceContext` → gets new `FileId(N)` +3. Call `remap_file_ids` on the parsed AST → `FileId(0)` becomes `FileId(N)` +4. Merge filenames lists +5. Splice remapped blocks into main AST + +This pattern requires no changes to pampa's reader API. The reader always +starts fresh; the caller remaps and merges. This is the standard approach +throughout the codebase — `quarto_ast_reconcile::remap_file_ids` provides +the shared utility for walking and remapping. + +### No changes to ShortcodeResolveTransform + +After include expansion, include shortcode nodes are gone from the AST — +replaced by the included content. `ShortcodeResolveTransform` in stage 6 +continues to handle `meta`, Lua shortcodes, and extension shortcodes. It +simply won't encounter include shortcodes. No changes needed. + +### No changes to ExecutionEngine trait + +SourceInfo goes in `ExecutionContext`, not the trait signature. Built-in +engines (`MarkdownEngine`, `JupyterEngine`, `KnitrEngine`) don't need any +implementation changes. They can optionally use `ctx.source_info` for error +remapping in the future. + +## Success Criteria + +- [x] Include shortcodes resolved before engine execution +- [x] Recursive includes work; circular includes produce clear error +- [x] Included code cells are visible to the engine (the whole point) +- [x] QMD writer produces SourceInfo mapping serialized text to AST nodes +- [x] SourceInfo in ExecutionContext maps engine input back to original files +- [x] `map_offset` resolves through include boundaries to correct file + line +- [x] All existing tests pass (no regressions) +- [x] Thorough unit tests for SourceInfo chain, even though no engine uses it +- [x] `cargo nextest run --workspace` passes +- [x] Error remapping responsibility documented as open question diff --git a/claude-notes/plans/2026-04-23-ipynb-filters-and-engine-partitioning.md b/claude-notes/plans/2026-04-23-ipynb-filters-and-engine-partitioning.md new file mode 100644 index 000000000..bc11c74d6 --- /dev/null +++ b/claude-notes/plans/2026-04-23-ipynb-filters-and-engine-partitioning.md @@ -0,0 +1,143 @@ +# Research Plan: ipynb-filters and Engine Partitioning + +**Status:** Research — future work, not part of the TS engine extensions project +**Depends on:** TS engine extensions (Plans 1a/1b), native Jupyter engine +**Context:** This plan was identified during review of the TS engine extensions +grand plan. The protocol and trait additions needed to *support* this plan are +included in Plans 1a/1b (the `partitioned_markdown` trait method and +`PartitionedMarkdown` protocol message). This plan covers the *implementation* +of ipynb-filters and the pipeline integration to use them. + +## Motivation + +Quarto 1's `ipynb-filters` feature allows user-provided subprocesses to transform +notebook JSON before execution. Filter-injected YAML metadata can override format +configuration. Two callers observe the filtered output outside the execute path: + +1. **Format resolution** (render-contexts.ts:632): harvests filter-injected YAML + and merges it into the already-resolved format config, pre-execute. +2. **Project indexing** (project-index.ts:102): builds per-file index entries + (title, headingText, draft) from filtered content for website navigation, + book chapter listings, blog entries, and cross-document link resolution. + +In practice, `ipynb-filters` is only specified at project level (`_quarto.yml`) +or in extensions, never per-document. This simplifies the integration — we don't +need full metadata merging before knowing whether filters are in play. + +## What's already in Plans 1a/1b + +The TS engine extensions project delivers the infrastructure this plan builds on: + +- **`partitioned_markdown` on the `ExecutionEngine` trait** (Plan 1a Phase 3) — + with default impl `partition(markdown_for_file(file).value)`. Jupyter overrides. +- **`PartitionedMarkdown` protocol message** (Plan 1a Phase 1) — so TS engine + extensions can also implement `partitionedMarkdown`. +- **`TsPartitionedMarkdown` protocol type** (Plan 1a appendix). +- **`EngineMeta.has_partitioned_markdown`** — TsEngine forwards to subprocess + if the engine reports having it, falls back to default impl otherwise. +- **`target()` as harness-internal** — the harness checks if the TS engine + implements it, calls it if so, keeps the result (including opaque `data` + cookie) on the Deno side. Not a protocol message or Rust trait method. + +## Scope of this plan + +1. **ipynb-filter subprocess execution** in the native Rust Jupyter engine. +2. **Pipeline integration** for the format resolution YAML harvest. +3. **Project index integration** for filter-aware project scanning. +4. **`partition_markdown()` utility function** and `PartitionedMarkdown` Rust type. + +## Background + +### What partitionedMarkdown returns + +Quarto 1's `PartitionedMarkdown`: +```typescript +{ yaml?: Metadata, headingText?: string, headingAttr?: PandocAttr, + containsRefs: boolean, markdown: string, srcMarkdownNoYaml: string } +``` + +What callers actually use: +- **Format resolution**: only `yaml` (to merge filter-injected YAML into format) +- **Project indexing**: `yaml.title`, `yaml.draft`, `headingText`, plus stores + the full partition in the index entry + +### How ipynb-filters work (Quarto 1) + +Filters are subprocesses: each receives notebook JSON on stdin, outputs +transformed JSON on stdout. Filters chain (output of one → input of next). +Results are cached on disk keyed by mtime of notebook + all filter scripts. + +The filter list comes from `format.execute["ipynb-filters"]`. + +### How target() is used + +In Quarto 1, `target()` is called before format resolution. Its `metadata` +feeds format resolution, and its `data` cookie (e.g., Jupyter's kernelspec) +is passed through to `execute()`. + +In q2, `target()` is harness-internal for TS engines and not needed on the +Rust side because: +- q2's pipeline extracts metadata from the parsed AST (via pampa) +- q2 constructs execution context from the AST, not from a target() call + +## Research Items + +### R1: Rust-side `PartitionedMarkdown` type + +**Resolved in Plan 1a.** The struct has all 6 fields matching Quarto 1, +using q2-native types (`ConfigValue` for yaml, `PandocAttr` for heading +attributes). `TsEngine` converts from the protocol type +(`TsPartitionedMarkdown`) at the boundary. + +### R2: `partition_markdown()` utility function + +Needed for the default `partitioned_markdown` trait implementation (delivered +by Plan 1a). Splits QMD text into YAML frontmatter, first heading, and body. +Options: +- String-based splitter (regex or manual — lightweight, no tree-sitter) +- Use pampa's parser (accurate, but heavier — do we want a pampa dependency + in the engine trait's default impl?) + +The default trait impl is `partition(markdown_for_file(file).value)`, so +this function receives QMD text (already converted from percent/spin scripts). + +### R3: Native Jupyter engine ipynb-filter implementation + +The Rust Jupyter engine needs to override `partitioned_markdown` to: +- Read notebook JSON from disk +- Run filter subprocesses (stdin/stdout JSON chaining) +- Cache filtered results (mtime-based, matching Quarto 1's scheme) +- Convert filtered notebook JSON → markdown text +- Partition the markdown + +**Open question:** How much of the notebook→markdown conversion does the +native Jupyter engine already have? What's the gap? + +### R4: Pipeline integration for format resolution YAML harvest + +After `MetadataMergeStage`, check if merged config has `execute.ipynb-filters`. +If so, call `engine.partitioned_markdown(file, Some(&format))` and merge the +returned YAML into the format. + +This could be: +- A step inside `EngineExecutionStage` (before the main execute call) +- A new mini-stage between MetadataMerge and IncludeExpansion +- A hook in MetadataMerge itself + +**Open question:** Where exactly in the pipeline? The engine must already be +identified (from `ctx.claimed_engine_name` or detection). + +Since ipynb-filters are project-level only, the pipeline knows about them +from `_quarto.yml` before parsing any document — no chicken-and-egg. + +### R5: Project index integration + +When q2 builds a project-wide index (website nav, book chapters, etc.), +notebooks with ipynb-filters need `partitioned_markdown` called with the +resolved format. This requires: +- Engine subprocess running during project scanning +- Format resolution available per-file (at least enough to determine + ipynb-filters presence) + +q2 doesn't have project-wide nav indexing yet. Document as an integration +point for when that feature is built. diff --git a/crates/experiments/reconcile-viewer/src/main.rs b/crates/experiments/reconcile-viewer/src/main.rs index 82fd11d1a..4d917e0bd 100644 --- a/crates/experiments/reconcile-viewer/src/main.rs +++ b/crates/experiments/reconcile-viewer/src/main.rs @@ -148,7 +148,7 @@ fn inline_type_name(inline: &Inline) -> &'static str { Inline::Span(_) => "Span", Inline::Shortcode(_) => "Shortcode", Inline::NoteReference(_) => "NoteReference", - Inline::Attr(_, _) => "Attr", + Inline::Attr(_) => "Attr", Inline::Insert(_) => "Insert", Inline::Delete(_) => "Delete", Inline::Highlight(_) => "Highlight", diff --git a/crates/pampa/src/filters.rs b/crates/pampa/src/filters.rs index 09edc0b59..cb525a403 100644 --- a/crates/pampa/src/filters.rs +++ b/crates/pampa/src/filters.rs @@ -4,7 +4,7 @@ */ use crate::filter_context::FilterContext; -use crate::pandoc::{self, AsInline, Block, Blocks, Inline, Inlines, MetaBlock}; +use crate::pandoc::{self, AsInline, Block, Blocks, Inline, InlineAttr, Inlines, MetaBlock}; use quarto_pandoc_types::{ConfigMapEntry, ConfigValue, ConfigValueKind}; // filters are destructive and take ownership of the input @@ -237,15 +237,14 @@ impl_inline_filterable_terminal!( NoteReference ); -// Attr is special because it has two fields (Attr, AttrSourceInfo) -// We need a custom impl that preserves attr_source -// However, filters don't actually work on Attr values directly, -// so this is just a placeholder that should never be called -impl InlineFilterableStructure for (pandoc::Attr, crate::pandoc::attr::AttrSourceInfo) { +// Attr is special — it wraps an InlineAttr struct. +// Filters don't actually work on Attr values directly, +// so this is just a placeholder that should never be called. +impl InlineFilterableStructure for InlineAttr { fn filter_structure(self, _: &mut Filter, _ctx: &mut FilterContext) -> Inline { // Note: This should not be called in practice because Attr inlines // are stripped during postprocessing before filters run - Inline::Attr(self.0, self.1) + Inline::Attr(self) } } @@ -600,19 +599,18 @@ pub fn topdown_traverse_inline( Inline::NoteReference(note_ref) => { handle_inline_filter!(NoteReference, note_ref, note_reference, filter, ctx) } - Inline::Attr(attr, attr_source) => { - // Special handling for Attr since it has two fields and filters don't actually work on Attr tuples - // Attr inlines should be stripped during postprocessing before filters run - // So this branch should rarely be hit + Inline::Attr(inline_attr) => { + // Special handling for Attr — filters don't actually work on Attr values. + // Attr inlines should be stripped during postprocessing before filters run. if let Some(f) = &mut filter.inline { - let inline = Inline::Attr(attr, attr_source); + let inline = Inline::Attr(inline_attr); match f(inline.clone(), ctx) { FilterReturn::Unchanged(_) => vec![inline], FilterReturn::FilterResult(result, _should_recurse) => result, } } else { vec![traverse_inline_structure( - Inline::Attr(attr, attr_source), + Inline::Attr(inline_attr), filter, ctx, )] @@ -877,7 +875,7 @@ pub fn traverse_inline_structure( // extensions Inline::Shortcode(_) => inline, Inline::NoteReference(_) => inline, - Inline::Attr(_, _) => inline, + Inline::Attr(_) => inline, _ => traverse_inline_nonterminal(inline, filter, ctx), } } @@ -1502,20 +1500,20 @@ mod tests { let mut filter = Filter::new().with_inlines(|inlines, _ctx| FilterReturn::Unchanged(inlines)); let mut ctx = FilterContext::new(); - let inline = Inline::Attr(empty_attr(), AttrSourceInfo::empty()); + let inline = Inline::Attr(InlineAttr::new(empty_attr(), AttrSourceInfo::empty())); let result = topdown_traverse_inline(inline, &mut filter, &mut ctx); assert_eq!(result.len(), 1); - assert!(matches!(result[0], Inline::Attr(_, _))); + assert!(matches!(result[0], Inline::Attr(_))); } #[test] fn test_traverse_attr_without_filter() { let mut filter = Filter::new(); let mut ctx = FilterContext::new(); - let inline = Inline::Attr(empty_attr(), AttrSourceInfo::empty()); + let inline = Inline::Attr(InlineAttr::new(empty_attr(), AttrSourceInfo::empty())); let result = topdown_traverse_inline(inline, &mut filter, &mut ctx); assert_eq!(result.len(), 1); - assert!(matches!(result[0], Inline::Attr(_, _))); + assert!(matches!(result[0], Inline::Attr(_))); } // === Tests for block traversal === @@ -2122,9 +2120,9 @@ mod tests { fn test_inline_filterable_structure_attr() { let mut filter = Filter::new(); let mut ctx = FilterContext::new(); - let attr_tuple = (empty_attr(), AttrSourceInfo::empty()); - let result = attr_tuple.filter_structure(&mut filter, &mut ctx); - assert!(matches!(result, Inline::Attr(_, _))); + let inline_attr = InlineAttr::new(empty_attr(), AttrSourceInfo::empty()); + let result = inline_attr.filter_structure(&mut filter, &mut ctx); + assert!(matches!(result, Inline::Attr(_))); } // === Tests for BlockFilterableStructure implementations === diff --git a/crates/pampa/src/lua/diagnostics.rs b/crates/pampa/src/lua/diagnostics.rs index 319d3b713..608788422 100644 --- a/crates/pampa/src/lua/diagnostics.rs +++ b/crates/pampa/src/lua/diagnostics.rs @@ -14,7 +14,6 @@ use quarto_source_map::{FileId, SourceInfo, SourcePiece}; use std::sync::Arc; use super::types::{LuaBlock, LuaInline}; -use crate::pandoc::{Block, Inline}; /// Register the quarto namespace with diagnostic functions pub fn register_quarto_namespace(lua: &Lua) -> Result<()> { @@ -149,83 +148,18 @@ fn source_info_from_lua_table(table: &Table) -> Result { // Helper Functions for Extracting SourceInfo from Elements // ============================================================================ -/// Extract SourceInfo from an Inline element -/// -/// Returns None for element types that don't have source_info (Shortcode, Attr) -fn get_inline_source_info(inline: &Inline) -> Option { - match inline { - Inline::Str(s) => Some(s.source_info.clone()), - Inline::Emph(e) => Some(e.source_info.clone()), - Inline::Underline(u) => Some(u.source_info.clone()), - Inline::Strong(s) => Some(s.source_info.clone()), - Inline::Strikeout(s) => Some(s.source_info.clone()), - Inline::Superscript(s) => Some(s.source_info.clone()), - Inline::Subscript(s) => Some(s.source_info.clone()), - Inline::SmallCaps(s) => Some(s.source_info.clone()), - Inline::Quoted(q) => Some(q.source_info.clone()), - Inline::Cite(c) => Some(c.source_info.clone()), - Inline::Code(c) => Some(c.source_info.clone()), - Inline::Space(s) => Some(s.source_info.clone()), - Inline::SoftBreak(s) => Some(s.source_info.clone()), - Inline::LineBreak(l) => Some(l.source_info.clone()), - Inline::Math(m) => Some(m.source_info.clone()), - Inline::RawInline(r) => Some(r.source_info.clone()), - Inline::Link(l) => Some(l.source_info.clone()), - Inline::Image(i) => Some(i.source_info.clone()), - Inline::Note(n) => Some(n.source_info.clone()), - Inline::Span(s) => Some(s.source_info.clone()), - Inline::Insert(i) => Some(i.source_info.clone()), - Inline::Delete(d) => Some(d.source_info.clone()), - Inline::Highlight(h) => Some(h.source_info.clone()), - Inline::EditComment(e) => Some(e.source_info.clone()), - Inline::NoteReference(n) => Some(n.source_info.clone()), - Inline::Custom(c) => Some(c.source_info.clone()), - // These element types don't have source_info - Inline::Shortcode(_) => None, - Inline::Attr(_, _) => None, - } -} - -/// Extract SourceInfo from a Block element -fn get_block_source_info(block: &Block) -> SourceInfo { - match block { - Block::Plain(p) => p.source_info.clone(), - Block::Paragraph(p) => p.source_info.clone(), - Block::LineBlock(l) => l.source_info.clone(), - Block::CodeBlock(c) => c.source_info.clone(), - Block::RawBlock(r) => r.source_info.clone(), - Block::BlockQuote(b) => b.source_info.clone(), - Block::OrderedList(o) => o.source_info.clone(), - Block::BulletList(b) => b.source_info.clone(), - Block::DefinitionList(d) => d.source_info.clone(), - Block::Header(h) => h.source_info.clone(), - Block::HorizontalRule(h) => h.source_info.clone(), - Block::Table(t) => t.source_info.clone(), - Block::Figure(f) => f.source_info.clone(), - Block::Div(d) => d.source_info.clone(), - Block::BlockMetadata(b) => b.source_info.clone(), - Block::NoteDefinitionPara(n) => n.source_info.clone(), - Block::NoteDefinitionFencedBlock(n) => n.source_info.clone(), - Block::CaptionBlock(c) => c.source_info.clone(), - Block::Custom(c) => c.source_info.clone(), - } -} - /// Extract SourceInfo from an AST element (Inline or Block) and convert to Lua table fn extract_source_info_from_element(lua: &Lua, elem: &Value) -> Result> { if let Value::UserData(ud) = elem { // Try to extract source info from Inline element if let Ok(lua_inline) = ud.borrow::() { - if let Some(si) = get_inline_source_info(&lua_inline.borrow_inline()) { - return Ok(Some(source_info_to_lua_table(lua, &si)?)); - } - // Element type without source_info (Shortcode, Attr) - return None - return Ok(None); + let inline = lua_inline.borrow_inline(); + return Ok(Some(source_info_to_lua_table(lua, inline.source_info())?)); } // Try to extract source info from Block element if let Ok(lua_block) = ud.borrow::() { - let si = get_block_source_info(&lua_block.borrow_block()); - return Ok(Some(source_info_to_lua_table(lua, &si)?)); + let block = lua_block.borrow_block(); + return Ok(Some(source_info_to_lua_table(lua, block.source_info())?)); } } // Not a recognized element type @@ -823,976 +757,76 @@ mod tests { } // ========================================================================= - // Tests for get_inline_source_info - covering all Inline variants + // Tests for Inline::source_info() and Block::source_info() moved to + // quarto-pandoc-types. The duplicate free functions they tested have been + // replaced by the enum methods. // ========================================================================= - #[test] - fn test_get_inline_source_info_emph() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Emph; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(1), - start_offset: 10, - end_offset: 20, - }; - let emph = Inline::Emph(Emph { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&emph), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_underline() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Underline; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(2), - start_offset: 20, - end_offset: 30, - }; - let underline = Inline::Underline(Underline { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&underline), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_strong() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Strong; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(3), - start_offset: 30, - end_offset: 40, - }; - let strong = Inline::Strong(Strong { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&strong), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_strikeout() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Strikeout; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(4), - start_offset: 40, - end_offset: 50, - }; - let strikeout = Inline::Strikeout(Strikeout { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&strikeout), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_superscript() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Superscript; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(5), - start_offset: 50, - end_offset: 60, - }; - let superscript = Inline::Superscript(Superscript { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&superscript), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_subscript() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Subscript; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(6), - start_offset: 60, - end_offset: 70, - }; - let subscript = Inline::Subscript(Subscript { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&subscript), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_smallcaps() { - use crate::pandoc::Inline; - use crate::pandoc::inline::SmallCaps; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(7), - start_offset: 70, - end_offset: 80, - }; - let smallcaps = Inline::SmallCaps(SmallCaps { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&smallcaps), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_quoted() { - use crate::pandoc::Inline; - use crate::pandoc::inline::{QuoteType, Quoted}; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(8), - start_offset: 80, - end_offset: 90, - }; - let quoted = Inline::Quoted(Quoted { - quote_type: QuoteType::DoubleQuote, - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info("ed), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_cite() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Cite; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(9), - start_offset: 90, - end_offset: 100, - }; - let cite = Inline::Cite(Cite { - citations: vec![], - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&cite), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_code() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::Code; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(10), - start_offset: 100, - end_offset: 110, - }; - let code = Inline::Code(Code { - attr: (String::new(), vec![], LinkedHashMap::new()), - text: "code".to_string(), - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&code), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_space() { - use crate::pandoc::Inline; - use crate::pandoc::inline::Space; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(11), - start_offset: 110, - end_offset: 111, - }; - let space = Inline::Space(Space { - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&space), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_softbreak() { - use crate::pandoc::Inline; - use crate::pandoc::inline::SoftBreak; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(12), - start_offset: 120, - end_offset: 121, - }; - let softbreak = Inline::SoftBreak(SoftBreak { - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&softbreak), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_linebreak() { - use crate::pandoc::Inline; - use crate::pandoc::inline::LineBreak; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(13), - start_offset: 130, - end_offset: 131, - }; - let linebreak = Inline::LineBreak(LineBreak { - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&linebreak), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_math() { - use crate::pandoc::Inline; - use crate::pandoc::inline::{Math, MathType}; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(14), - start_offset: 140, - end_offset: 150, - }; - let math = Inline::Math(Math { - math_type: MathType::InlineMath, - text: "x^2".to_string(), - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&math), Some(source_info)); - } + // ========================================================================= + // Tests for error paths and edge cases + // ========================================================================= #[test] - fn test_get_inline_source_info_rawinline() { - use crate::pandoc::Inline; - use crate::pandoc::inline::RawInline; - use quarto_source_map::FileId; + fn test_source_info_from_lua_table_unknown_type_error() { + let lua = Lua::new(); + let table = lua.create_table().unwrap(); + table.set("t", "Unknown").unwrap(); - let source_info = SourceInfo::Original { - file_id: FileId(15), - start_offset: 150, - end_offset: 160, - }; - let rawinline = Inline::RawInline(RawInline { - format: "html".to_string(), - text: "".to_string(), - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&rawinline), Some(source_info)); + let result = source_info_from_lua_table(&table); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("Unknown SourceInfo type")); } #[test] - fn test_get_inline_source_info_link() { - use crate::pandoc::Inline; - use crate::pandoc::attr::{AttrSourceInfo, TargetSourceInfo}; - use crate::pandoc::inline::Link; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(16), - start_offset: 160, - end_offset: 170, - }; - let link = Inline::Link(Link { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - target: ("url".to_string(), "title".to_string()), - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - target_source: TargetSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&link), Some(source_info)); - } + fn test_extract_source_info_non_userdata_returns_none() { + let lua = Lua::new(); + register_quarto_namespace(&lua).unwrap(); - #[test] - fn test_get_inline_source_info_image() { - use crate::pandoc::Inline; - use crate::pandoc::attr::{AttrSourceInfo, TargetSourceInfo}; - use crate::pandoc::inline::Image; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; + // Pass a non-userdata value (a table) as the element argument + lua.load( + r#" + local t = {} + quarto.warn("Test warning", t) + "#, + ) + .set_name("@test.lua") + .exec() + .unwrap(); - let source_info = SourceInfo::Original { - file_id: FileId(17), - start_offset: 170, - end_offset: 180, - }; - let image = Inline::Image(Image { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - target: ("image.png".to_string(), String::new()), - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - target_source: TargetSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&image), Some(source_info)); + // Should still work, falling back to stack location + let diagnostics = extract_lua_diagnostics(&lua).unwrap(); + assert_eq!(diagnostics.len(), 1); + // Should have FilterProvenance since the element wasn't recognized + match &diagnostics[0].location { + Some(SourceInfo::FilterProvenance { .. }) => {} + other => panic!( + "Expected FilterProvenance for non-userdata element, got {:?}", + other + ), + } } #[test] - fn test_get_inline_source_info_note() { + fn test_quarto_warn_with_shortcode_element_uses_source_info() { use crate::pandoc::Inline; - use crate::pandoc::inline::Note; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(18), - start_offset: 180, - end_offset: 190, - }; - let note = Inline::Note(Note { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(¬e), Some(source_info)); - } + use quarto_pandoc_types::shortcode::Shortcode; + use std::collections::HashMap; - #[test] - fn test_get_inline_source_info_span() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::Span; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(19), - start_offset: 190, - end_offset: 200, - }; - let span = Inline::Span(Span { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&span), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_insert() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::Insert; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(20), - start_offset: 200, - end_offset: 210, - }; - let insert = Inline::Insert(Insert { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&insert), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_delete() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::Delete; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(21), - start_offset: 210, - end_offset: 220, - }; - let delete = Inline::Delete(Delete { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&delete), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_highlight() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::Highlight; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(22), - start_offset: 220, - end_offset: 230, - }; - let highlight = Inline::Highlight(Highlight { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&highlight), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_editcomment() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::inline::EditComment; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(23), - start_offset: 230, - end_offset: 240, - }; - let editcomment = Inline::EditComment(EditComment { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_inline_source_info(&editcomment), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_notereference() { - use crate::pandoc::Inline; - use crate::pandoc::inline::NoteReference; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(24), - start_offset: 240, - end_offset: 250, - }; - let noteref = Inline::NoteReference(NoteReference { - id: "note1".to_string(), - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(¬eref), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_custom() { - use crate::pandoc::Inline; - use crate::pandoc::custom::CustomNode; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(25), - start_offset: 250, - end_offset: 260, - }; - let custom = Inline::Custom(CustomNode { - type_name: "test".to_string(), - slots: LinkedHashMap::new(), - plain_data: serde_json::Value::Null, - attr: (String::new(), vec![], LinkedHashMap::new()), - source_info: source_info.clone(), - }); - assert_eq!(get_inline_source_info(&custom), Some(source_info)); - } - - #[test] - fn test_get_inline_source_info_shortcode_returns_none() { - use crate::pandoc::Inline; - use quarto_pandoc_types::shortcode::Shortcode; - use std::collections::HashMap; + let lua = Lua::new(); + register_quarto_namespace(&lua).unwrap(); + // Shortcode has source_info — Inline::source_info() returns it directly let shortcode = Inline::Shortcode(Shortcode { is_escaped: false, name: "test".to_string(), positional_args: vec![], keyword_args: HashMap::new(), - source_info: quarto_source_map::SourceInfo::default(), - }); - // Shortcode now has source_info, but we return None for now - // (this may change when shortcode resolution is implemented) - assert_eq!(get_inline_source_info(&shortcode), None); - } - - #[test] - fn test_get_inline_source_info_attr_returns_none() { - use crate::pandoc::Inline; - use crate::pandoc::attr::AttrSourceInfo; - use hashlink::LinkedHashMap; - - let attr = Inline::Attr( - (String::new(), vec![], LinkedHashMap::new()), - AttrSourceInfo::empty(), - ); - assert_eq!(get_inline_source_info(&attr), None); - } - - // ========================================================================= - // Tests for get_block_source_info - covering all Block variants - // ========================================================================= - - #[test] - fn test_get_block_source_info_plain() { - use crate::pandoc::Block; - use crate::pandoc::block::Plain; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(1), - start_offset: 0, - end_offset: 10, - }; - let plain = Block::Plain(Plain { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&plain), source_info); - } - - #[test] - fn test_get_block_source_info_lineblock() { - use crate::pandoc::Block; - use crate::pandoc::block::LineBlock; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(2), - start_offset: 10, - end_offset: 20, - }; - let lineblock = Block::LineBlock(LineBlock { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&lineblock), source_info); - } - - #[test] - fn test_get_block_source_info_codeblock() { - use crate::pandoc::Block; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::block::CodeBlock; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(3), - start_offset: 20, - end_offset: 30, - }; - let codeblock = Block::CodeBlock(CodeBlock { - attr: (String::new(), vec![], LinkedHashMap::new()), - text: "code".to_string(), - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_block_source_info(&codeblock), source_info); - } - - #[test] - fn test_get_block_source_info_rawblock() { - use crate::pandoc::Block; - use crate::pandoc::block::RawBlock; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(4), - start_offset: 30, - end_offset: 40, - }; - let rawblock = Block::RawBlock(RawBlock { - format: "html".to_string(), - text: "
".to_string(), - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&rawblock), source_info); - } - - #[test] - fn test_get_block_source_info_blockquote() { - use crate::pandoc::Block; - use crate::pandoc::block::BlockQuote; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(5), - start_offset: 40, - end_offset: 50, - }; - let blockquote = Block::BlockQuote(BlockQuote { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&blockquote), source_info); - } - - #[test] - fn test_get_block_source_info_orderedlist() { - use crate::pandoc::Block; - use crate::pandoc::block::OrderedList; - use crate::pandoc::list::{ListNumberDelim, ListNumberStyle}; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(6), - start_offset: 50, - end_offset: 60, - }; - let orderedlist = Block::OrderedList(OrderedList { - attr: (1, ListNumberStyle::Default, ListNumberDelim::Default), - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&orderedlist), source_info); - } - - #[test] - fn test_get_block_source_info_bulletlist() { - use crate::pandoc::Block; - use crate::pandoc::block::BulletList; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(7), - start_offset: 60, - end_offset: 70, - }; - let bulletlist = Block::BulletList(BulletList { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&bulletlist), source_info); - } - - #[test] - fn test_get_block_source_info_definitionlist() { - use crate::pandoc::Block; - use crate::pandoc::block::DefinitionList; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(8), - start_offset: 70, - end_offset: 80, - }; - let deflist = Block::DefinitionList(DefinitionList { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&deflist), source_info); - } - - #[test] - fn test_get_block_source_info_header() { - use crate::pandoc::Block; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::block::Header; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(9), - start_offset: 80, - end_offset: 90, - }; - let header = Block::Header(Header { - level: 1, - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_block_source_info(&header), source_info); - } - - #[test] - fn test_get_block_source_info_horizontalrule() { - use crate::pandoc::Block; - use crate::pandoc::block::HorizontalRule; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(10), - start_offset: 90, - end_offset: 93, - }; - let hrule = Block::HorizontalRule(HorizontalRule { - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&hrule), source_info); - } - - #[test] - fn test_get_block_source_info_table() { - use crate::pandoc::Block; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::caption::Caption; - use crate::pandoc::table::{Table, TableFoot, TableHead}; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(11), - start_offset: 100, - end_offset: 200, - }; - let table = Block::Table(Table { - attr: (String::new(), vec![], LinkedHashMap::new()), - caption: Caption { - short: None, - long: None, - source_info: source_info.clone(), - }, - colspec: vec![], - head: TableHead { - attr: (String::new(), vec![], LinkedHashMap::new()), - rows: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }, - bodies: vec![], - foot: TableFoot { - attr: (String::new(), vec![], LinkedHashMap::new()), - rows: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }, - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_block_source_info(&table), source_info); - } - - #[test] - fn test_get_block_source_info_figure() { - use crate::pandoc::Block; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::block::Figure; - use crate::pandoc::caption::Caption; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(12), - start_offset: 200, - end_offset: 300, - }; - let figure = Block::Figure(Figure { - attr: (String::new(), vec![], LinkedHashMap::new()), - caption: Caption { - short: None, - long: None, - source_info: source_info.clone(), - }, - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_block_source_info(&figure), source_info); - } - - #[test] - fn test_get_block_source_info_div() { - use crate::pandoc::Block; - use crate::pandoc::attr::AttrSourceInfo; - use crate::pandoc::block::Div; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(13), - start_offset: 300, - end_offset: 400, - }; - let div = Block::Div(Div { - attr: (String::new(), vec![], LinkedHashMap::new()), - content: vec![], - source_info: source_info.clone(), - attr_source: AttrSourceInfo::empty(), - }); - assert_eq!(get_block_source_info(&div), source_info); - } - - #[test] - fn test_get_block_source_info_blockmetadata() { - use crate::pandoc::Block; - use crate::pandoc::block::MetaBlock; - use crate::pandoc::config_value::ConfigValue; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(14), - start_offset: 400, - end_offset: 500, - }; - let metablock = Block::BlockMetadata(MetaBlock { - meta: ConfigValue::null(source_info.clone()), - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&metablock), source_info); - } - - #[test] - fn test_get_block_source_info_notedefinitionpara() { - use crate::pandoc::Block; - use crate::pandoc::block::NoteDefinitionPara; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(15), - start_offset: 500, - end_offset: 600, - }; - let notedefpara = Block::NoteDefinitionPara(NoteDefinitionPara { - id: "note1".to_string(), - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(¬edefpara), source_info); - } - - #[test] - fn test_get_block_source_info_notedefinitionfencedblock() { - use crate::pandoc::Block; - use crate::pandoc::block::NoteDefinitionFencedBlock; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(16), - start_offset: 600, - end_offset: 700, - }; - let notedeffenced = Block::NoteDefinitionFencedBlock(NoteDefinitionFencedBlock { - id: "note2".to_string(), - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(¬edeffenced), source_info); - } - - #[test] - fn test_get_block_source_info_captionblock() { - use crate::pandoc::Block; - use crate::pandoc::block::CaptionBlock; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(17), - start_offset: 700, - end_offset: 800, - }; - let captionblock = Block::CaptionBlock(CaptionBlock { - content: vec![], - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&captionblock), source_info); - } - - #[test] - fn test_get_block_source_info_custom() { - use crate::pandoc::Block; - use crate::pandoc::custom::CustomNode; - use hashlink::LinkedHashMap; - use quarto_source_map::FileId; - - let source_info = SourceInfo::Original { - file_id: FileId(18), - start_offset: 800, - end_offset: 900, - }; - let custom = Block::Custom(CustomNode { - type_name: "callout".to_string(), - slots: LinkedHashMap::new(), - plain_data: serde_json::Value::Null, - attr: (String::new(), vec![], LinkedHashMap::new()), - source_info: source_info.clone(), - }); - assert_eq!(get_block_source_info(&custom), source_info); - } - - // ========================================================================= - // Tests for error paths and edge cases - // ========================================================================= - - #[test] - fn test_source_info_from_lua_table_unknown_type_error() { - let lua = Lua::new(); - let table = lua.create_table().unwrap(); - table.set("t", "Unknown").unwrap(); - - let result = source_info_from_lua_table(&table); - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!(err.to_string().contains("Unknown SourceInfo type")); - } - - #[test] - fn test_extract_source_info_non_userdata_returns_none() { - let lua = Lua::new(); - register_quarto_namespace(&lua).unwrap(); - - // Pass a non-userdata value (a table) as the element argument - lua.load( - r#" - local t = {} - quarto.warn("Test warning", t) - "#, - ) - .set_name("@test.lua") - .exec() - .unwrap(); - - // Should still work, falling back to stack location - let diagnostics = extract_lua_diagnostics(&lua).unwrap(); - assert_eq!(diagnostics.len(), 1); - // Should have FilterProvenance since the element wasn't recognized - match &diagnostics[0].location { - Some(SourceInfo::FilterProvenance { .. }) => {} - other => panic!( - "Expected FilterProvenance for non-userdata element, got {:?}", - other + source_info: quarto_source_map::SourceInfo::original( + quarto_source_map::FileId(0), + 10, + 20, ), - } - } - - #[test] - fn test_quarto_warn_with_shortcode_element_falls_back_to_stack() { - use crate::pandoc::Inline; - use quarto_pandoc_types::shortcode::Shortcode; - use std::collections::HashMap; - - let lua = Lua::new(); - register_quarto_namespace(&lua).unwrap(); - - // Shortcode has source_info but get_inline_source_info returns None for it, - // so this test verifies fallback to stack location - let shortcode = Inline::Shortcode(Shortcode { - is_escaped: false, - name: "test".to_string(), - positional_args: vec![], - keyword_args: HashMap::new(), - source_info: quarto_source_map::SourceInfo::default(), }); let lua_inline = LuaInline::new(shortcode); lua.globals() @@ -1807,13 +841,13 @@ mod tests { let diagnostics = extract_lua_diagnostics(&lua).unwrap(); assert_eq!(diagnostics.len(), 1); - // Should fall back to FilterProvenance since Shortcode returns None for source_info + // Shortcode's own source_info is now used (not fallback to stack) match &diagnostics[0].location { - Some(SourceInfo::FilterProvenance { filter_path, .. }) => { - assert!(filter_path.contains("shortcode_filter.lua")); + Some(SourceInfo::Original { file_id, .. }) => { + assert_eq!(*file_id, quarto_source_map::FileId(0)); } other => panic!( - "Expected FilterProvenance for Shortcode element, got {:?}", + "Expected Original source_info for Shortcode element, got {:?}", other ), } diff --git a/crates/pampa/src/lua/filter.rs b/crates/pampa/src/lua/filter.rs index 5c08df7ff..234af9d36 100644 --- a/crates/pampa/src/lua/filter.rs +++ b/crates/pampa/src/lua/filter.rs @@ -688,7 +688,7 @@ fn inline_tag(inline: &Inline) -> &'static str { Inline::Span(_) => "Span", Inline::Shortcode(_) => "Shortcode", Inline::NoteReference(_) => "NoteReference", - Inline::Attr(_, _) => "Attr", + Inline::Attr(_) => "Attr", Inline::Insert(_) => "Insert", Inline::Delete(_) => "Delete", Inline::Highlight(_) => "Highlight", @@ -969,7 +969,7 @@ fn walk_inline_children_for_element_filters<'a>( | Inline::Cite(_) | Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) + | Inline::Attr(_) | Inline::Insert(_) | Inline::Delete(_) | Inline::Highlight(_) @@ -1711,7 +1711,7 @@ fn walk_inline_children_topdown<'a>( | Inline::Cite(_) | Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) + | Inline::Attr(_) | Inline::Custom(_) => Ok(inline.clone()), } }) @@ -2046,7 +2046,7 @@ mod unit_tests { fn test_inline_tag_all_variants() { use crate::pandoc::custom::CustomNode; use crate::pandoc::inline::*; - use crate::pandoc::{AttrSourceInfo, Inline, TargetSourceInfo}; + use crate::pandoc::{AttrSourceInfo, Inline, InlineAttr, TargetSourceInfo}; use hashlink::LinkedHashMap; use quarto_source_map::SourceInfo; use std::collections::HashMap; @@ -2226,10 +2226,10 @@ mod unit_tests { "NoteReference" ); assert_eq!( - inline_tag(&Inline::Attr( + inline_tag(&Inline::Attr(InlineAttr::new( (String::new(), vec![], LinkedHashMap::new()), AttrSourceInfo::empty() - )), + ))), "Attr" ); assert_eq!( diff --git a/crates/pampa/src/lua/types.rs b/crates/pampa/src/lua/types.rs index 2a099982b..f94d4044a 100644 --- a/crates/pampa/src/lua/types.rs +++ b/crates/pampa/src/lua/types.rs @@ -80,7 +80,7 @@ impl LuaInline { Inline::Span(_) => "Span", Inline::Shortcode(_) => "Shortcode", Inline::NoteReference(_) => "NoteReference", - Inline::Attr(_, _) => "Attr", + Inline::Attr(_) => "Attr", Inline::Insert(_) => "Insert", Inline::Delete(_) => "Delete", Inline::Highlight(_) => "Highlight", @@ -166,7 +166,7 @@ impl LuaInline { "walk", ], Inline::NoteReference(_) => &["tag", "id", "clone", "walk"], - Inline::Shortcode(_) | Inline::Attr(_, _) => &["tag", "clone", "walk"], + Inline::Shortcode(_) | Inline::Attr(_) => &["tag", "clone", "walk"], // Custom nodes are not exposed to Lua filters yet Inline::Custom(_) => &["tag", "clone"], } @@ -2855,10 +2855,10 @@ mod tests { #[test] fn test_lua_inline_tag_name_attr() { - let inline = Inline::Attr( + let inline = Inline::Attr(crate::pandoc::inline::InlineAttr::new( (String::new(), vec![], hashlink::LinkedHashMap::new()), attr_si(), - ); + )); assert_eq!(LuaInline::new(inline).tag_name(), "Attr"); } diff --git a/crates/pampa/src/lua/utils.rs b/crates/pampa/src/lua/utils.rs index 8e480b07d..0425f0e29 100644 --- a/crates/pampa/src/lua/utils.rs +++ b/crates/pampa/src/lua/utils.rs @@ -327,7 +327,7 @@ fn get_inline_type_name(inline: &Inline) -> String { Inline::Cite(_) => "Cite".to_string(), Inline::Shortcode(_) => "Shortcode".to_string(), Inline::NoteReference(_) => "NoteReference".to_string(), - Inline::Attr(_, _) => "Attr".to_string(), + Inline::Attr(_) => "Attr".to_string(), Inline::Insert(_) => "Insert".to_string(), Inline::Delete(_) => "Delete".to_string(), Inline::Highlight(_) => "Highlight".to_string(), @@ -794,7 +794,7 @@ fn stringify_inline(inline: &Inline) -> String { // Additional inline types Inline::Shortcode(_) => String::new(), Inline::NoteReference(_) => String::new(), - Inline::Attr(_, _) => String::new(), + Inline::Attr(_) => String::new(), Inline::Insert(i) => stringify_inlines(&i.content), Inline::Delete(d) => stringify_inlines(&d.content), Inline::Highlight(h) => stringify_inlines(&h.content), diff --git a/crates/pampa/src/pandoc/treesitter.rs b/crates/pampa/src/pandoc/treesitter.rs index 6b930d981..b014e8c7d 100644 --- a/crates/pampa/src/pandoc/treesitter.rs +++ b/crates/pampa/src/pandoc/treesitter.rs @@ -47,8 +47,8 @@ use crate::pandoc::ast_context::ASTContext; use crate::pandoc::attr::AttrSourceInfo; use crate::pandoc::block::{Block, Blocks, BulletList, OrderedList, Paragraph, Plain, RawBlock}; use crate::pandoc::inline::{ - Emph, Inline, LineBreak, Link, Math, MathType, Note, NoteReference, QuoteType, RawInline, - SoftBreak, Space, Str, Strikeout, Strong, Subscript, Superscript, + Emph, Inline, InlineAttr, LineBreak, Link, Math, MathType, Note, NoteReference, QuoteType, + RawInline, SoftBreak, Space, Str, Strikeout, Strong, Subscript, Superscript, }; use crate::pandoc::list::{ListAttributes, ListNumberDelim, ListNumberStyle}; use crate::pandoc::location::{node_location, node_source_info_with_context}; @@ -72,30 +72,6 @@ fn parse_anchor_shorthand(text: &str) -> Option<&str> { Some(inner) } -fn get_block_source_info(block: &Block) -> &quarto_source_map::SourceInfo { - match block { - Block::Plain(b) => &b.source_info, - Block::Paragraph(b) => &b.source_info, - Block::LineBlock(b) => &b.source_info, - Block::CodeBlock(b) => &b.source_info, - Block::RawBlock(b) => &b.source_info, - Block::BlockQuote(b) => &b.source_info, - Block::OrderedList(b) => &b.source_info, - Block::BulletList(b) => &b.source_info, - Block::DefinitionList(b) => &b.source_info, - Block::Header(b) => &b.source_info, - Block::HorizontalRule(b) => &b.source_info, - Block::Table(b) => &b.source_info, - Block::Figure(b) => &b.source_info, - Block::Div(b) => &b.source_info, - Block::BlockMetadata(b) => &b.source_info, - Block::NoteDefinitionPara(b) => &b.source_info, - Block::NoteDefinitionFencedBlock(b) => &b.source_info, - Block::CaptionBlock(b) => &b.source_info, - Block::Custom(b) => &b.source_info, - } -} - fn process_list( node: &tree_sitter::Node, children: Vec<(String, PandocNativeIntermediate)>, @@ -175,7 +151,7 @@ fn process_list( // but we do it in case we want to use it later last_para_end_row = None; last_item_end_row = blocks.last().and_then(|b| { - let source_info = get_block_source_info(b); + let source_info = b.source_info(); source_info .map_offset(source_info.length(), &context.source_context) .map(|mapped| mapped.location.row) @@ -220,7 +196,7 @@ fn process_list( last_para_end_row = None; } last_item_end_row = blocks.last().and_then(|b| { - let source_info = get_block_source_info(b); + let source_info = b.source_info(); source_info .map_offset(source_info.length(), &context.source_context) .map(|mapped| mapped.location.row) @@ -427,7 +403,7 @@ fn process_native_inline( // see tests/cursed/002.qmd for why this cannot be parsed directly in // the block grammar. PandocNativeIntermediate::IntermediateAttr(attr, attr_source) => { - Inline::Attr(attr, attr_source) + Inline::Attr(InlineAttr::new(attr, attr_source)) } PandocNativeIntermediate::IntermediateUnknown(range) => { writeln!( diff --git a/crates/pampa/src/pandoc/treesitter_utils/caption.rs b/crates/pampa/src/pandoc/treesitter_utils/caption.rs index 450e4d30b..1683d7bf3 100644 --- a/crates/pampa/src/pandoc/treesitter_utils/caption.rs +++ b/crates/pampa/src/pandoc/treesitter_utils/caption.rs @@ -46,7 +46,9 @@ pub fn process_caption( // If we found an attribute, append it as Inline::Attr if let Some((attr, attr_source)) = caption_attr { - caption_inlines.push(crate::pandoc::inline::Inline::Attr(attr, attr_source)); + caption_inlines.push(crate::pandoc::inline::Inline::Attr( + crate::pandoc::inline::InlineAttr::new(attr, attr_source), + )); } PandocNativeIntermediate::IntermediateBlock(Block::CaptionBlock(CaptionBlock { diff --git a/crates/pampa/src/pandoc/treesitter_utils/paragraph.rs b/crates/pampa/src/pandoc/treesitter_utils/paragraph.rs index d22f4a078..ef5182878 100644 --- a/crates/pampa/src/pandoc/treesitter_utils/paragraph.rs +++ b/crates/pampa/src/pandoc/treesitter_utils/paragraph.rs @@ -5,7 +5,7 @@ use crate::pandoc::ast_context::ASTContext; use crate::pandoc::block::{Block, Paragraph}; -use crate::pandoc::inline::Inline; +use crate::pandoc::inline::{Inline, InlineAttr}; use crate::pandoc::location::node_source_info_with_context; use crate::pandoc::treesitter_utils::pandocnativeintermediate::PandocNativeIntermediate; @@ -27,7 +27,7 @@ pub fn process_paragraph( } else if let PandocNativeIntermediate::IntermediateAttr(attr, attr_source) = child { // Attributes can appear in paragraphs (e.g., after math expressions) // They will be processed by postprocess.rs to create Spans - inlines.push(Inline::Attr(attr, attr_source)); + inlines.push(Inline::Attr(InlineAttr::new(attr, attr_source))); } } PandocNativeIntermediate::IntermediateBlock(Block::Paragraph(Paragraph { diff --git a/crates/pampa/src/pandoc/treesitter_utils/pipe_table.rs b/crates/pampa/src/pandoc/treesitter_utils/pipe_table.rs index ec5b58e4a..391a5d602 100644 --- a/crates/pampa/src/pandoc/treesitter_utils/pipe_table.rs +++ b/crates/pampa/src/pandoc/treesitter_utils/pipe_table.rs @@ -188,13 +188,9 @@ pub fn process_pipe_table( caption_source_info = Some(caption_block.source_info.clone()); // Extract Inline::Attr if present at the end (for soft-break captions) - if let Some(crate::pandoc::inline::Inline::Attr( - caption_attr, - caption_attr_source, - )) = inlines.last() - { - attr = caption_attr.clone(); - attr_source = caption_attr_source.clone(); + if let Some(crate::pandoc::inline::Inline::Attr(inline_attr)) = inlines.last() { + attr = inline_attr.attr.clone(); + attr_source = inline_attr.attr_source.clone(); inlines.pop(); // Trim trailing space before the attribute diff --git a/crates/pampa/src/pandoc/treesitter_utils/postprocess.rs b/crates/pampa/src/pandoc/treesitter_utils/postprocess.rs index 5e2198514..65ef19376 100644 --- a/crates/pampa/src/pandoc/treesitter_utils/postprocess.rs +++ b/crates/pampa/src/pandoc/treesitter_utils/postprocess.rs @@ -99,7 +99,7 @@ fn validate_list_table_div(div: &Div) -> ListTableValidation { let Block::BulletList(rows_list) = last_block else { return ListTableValidation::Invalid { reason: "list-table div's last block must be a bullet list (the rows)".to_string(), - location: get_block_source_info(last_block), + location: last_block.source_info().clone(), }; }; @@ -110,7 +110,7 @@ fn validate_list_table_div(div: &Div) -> ListTableValidation { let location = if row_blocks.is_empty() { rows_list.source_info.clone() } else { - get_block_source_info(&row_blocks[0]) + row_blocks[0].source_info().clone() }; return ListTableValidation::Invalid { reason: format!( @@ -129,7 +129,7 @@ fn validate_list_table_div(div: &Div) -> ListTableValidation { "row {} in list-table must contain a bullet list of cells", row_idx + 1 ), - location: get_block_source_info(&row_blocks[0]), + location: row_blocks[0].source_info().clone(), }; }; } @@ -137,31 +137,6 @@ fn validate_list_table_div(div: &Div) -> ListTableValidation { ListTableValidation::Valid } -/// Helper to get the source info from a Block -fn get_block_source_info(block: &Block) -> SourceInfo { - match block { - Block::Plain(b) => b.source_info.clone(), - Block::Paragraph(b) => b.source_info.clone(), - Block::LineBlock(b) => b.source_info.clone(), - Block::CodeBlock(b) => b.source_info.clone(), - Block::RawBlock(b) => b.source_info.clone(), - Block::BlockQuote(b) => b.source_info.clone(), - Block::OrderedList(b) => b.source_info.clone(), - Block::BulletList(b) => b.source_info.clone(), - Block::DefinitionList(b) => b.source_info.clone(), - Block::Header(b) => b.source_info.clone(), - Block::HorizontalRule(b) => b.source_info.clone(), - Block::Table(b) => b.source_info.clone(), - Block::Figure(b) => b.source_info.clone(), - Block::Div(b) => b.source_info.clone(), - Block::BlockMetadata(b) => b.source_info.clone(), - Block::CaptionBlock(b) => b.source_info.clone(), - Block::NoteDefinitionPara(b) => b.source_info.clone(), - Block::NoteDefinitionFencedBlock(b) => b.source_info.clone(), - Block::Custom(b) => b.source_info.clone(), - } -} - /// Parse alignment string ("l,c,r,d") into a vector of Alignment fn parse_alignments(aligns_str: &str) -> Vec { aligns_str @@ -402,7 +377,7 @@ fn transform_list_table_div(div: Div) -> Block { let cell_source_info = if cell_blocks.is_empty() { row_source_info.clone() } else { - get_block_source_info(&cell_blocks[0]) + cell_blocks[0].source_info().clone() }; cells.push(Cell { @@ -869,7 +844,7 @@ pub fn postprocess(doc: Pandoc, error_collector: &mut DiagnosticCollector) -> Re let is_last_attr = header .content .last() - .is_some_and(|v| matches!(v, Inline::Attr(_, _))); + .is_some_and(|v| matches!(v, Inline::Attr(_))); if !is_last_attr { let mut attr = header.attr.clone(); if attr.0.is_empty() { @@ -897,11 +872,11 @@ pub fn postprocess(doc: Pandoc, error_collector: &mut DiagnosticCollector) -> Re Unchanged(header) } } else { - let Some(Inline::Attr(attr, attr_source)) = header.content.pop() else { + let Some(Inline::Attr(inline_attr)) = header.content.pop() else { panic!("shouldn't happen, header should have an attribute at this point"); }; - header.attr = attr; - header.attr_source = attr_source; + header.attr = inline_attr.attr; + header.attr_source = inline_attr.attr_source; header.content = trim_inlines(header.content).0; FilterResult(vec![Block::Header(header)], true) } @@ -1267,22 +1242,28 @@ pub fn postprocess(doc: Pandoc, error_collector: &mut DiagnosticCollector) -> Re let attr_idx = if has_space { i + 2 } else { i + 1 }; if attr_idx < break_cleaned.len() - && let Inline::Attr(attr, attr_source) = &break_cleaned[attr_idx] + && let Inline::Attr(inline_attr) = &break_cleaned[attr_idx] { // Found Math + (Space?) + Attr pattern // Wrap Math in a Span with the attribute let mut classes = vec!["quarto-math-with-attribute".to_string()]; - classes.extend(attr.1.clone()); + classes.extend(inline_attr.attr.1.clone()); math_processed.push(Inline::Span(Span { - attr: (attr.0.clone(), classes, attr.2.clone()), + attr: ( + inline_attr.attr.0.clone(), + classes, + inline_attr.attr.2.clone(), + ), content: vec![Inline::Math(math.clone())], - source_info: if let Some(attr_overall) = attr_source.combine_all() { + source_info: if let Some(attr_overall) = + inline_attr.attr_source.combine_all() + { math.source_info.combine(&attr_overall) } else { math.source_info.clone() }, - attr_source: attr_source.clone(), + attr_source: inline_attr.attr_source.clone(), })); // Skip the Math, optional Space, and Attr @@ -1481,9 +1462,9 @@ pub fn postprocess(doc: Pandoc, error_collector: &mut DiagnosticCollector) -> Re crate::pandoc::attr::AttrSourceInfo, > = None; - if let Some(Inline::Attr(attr, attr_source)) = caption_content.last() { - caption_attr = Some(attr.clone()); - caption_attr_source = Some(attr_source.clone()); + if let Some(Inline::Attr(inline_attr)) = caption_content.last() { + caption_attr = Some(inline_attr.attr.clone()); + caption_attr_source = Some(inline_attr.attr_source.clone()); caption_content.pop(); // Remove the Attr from caption content // Trim trailing space before the attribute diff --git a/crates/pampa/src/pandoc/treesitter_utils/section.rs b/crates/pampa/src/pandoc/treesitter_utils/section.rs index 1e7ad7dfb..21f6b4e2b 100644 --- a/crates/pampa/src/pandoc/treesitter_utils/section.rs +++ b/crates/pampa/src/pandoc/treesitter_utils/section.rs @@ -65,11 +65,11 @@ pub fn process_section( caption_source_info = caption_block.source_info.clone(); // Extract Inline::Attr if present at the end - if let Some(crate::pandoc::inline::Inline::Attr(attr, attr_source)) = + if let Some(crate::pandoc::inline::Inline::Attr(inline_attr)) = caption_inlines.last() { - caption_attr = Some(attr.clone()); - caption_attr_source = Some(attr_source.clone()); + caption_attr = Some(inline_attr.attr.clone()); + caption_attr_source = Some(inline_attr.attr_source.clone()); caption_inlines.pop(); // Trim trailing space before the attribute diff --git a/crates/pampa/src/toc.rs b/crates/pampa/src/toc.rs index a6bb0fce8..c75dd1e27 100644 --- a/crates/pampa/src/toc.rs +++ b/crates/pampa/src/toc.rs @@ -431,7 +431,7 @@ fn inlines_to_text(inlines: &[Inline]) -> String { Inline::Math(m) => text.push_str(&m.text), Inline::RawInline(_) => {} // Skip raw content Inline::Shortcode(_) => {} // Skip shortcodes - Inline::Attr(_, _) => {} // Skip attribute nodes + Inline::Attr(_) => {} // Skip attribute nodes Inline::Insert(i) => text.push_str(&inlines_to_text(&i.content)), Inline::Delete(_) => {} // Skip deleted content Inline::Highlight(h) => text.push_str(&inlines_to_text(&h.content)), diff --git a/crates/pampa/src/writers/ansi.rs b/crates/pampa/src/writers/ansi.rs index 7260db646..83d64008b 100644 --- a/crates/pampa/src/writers/ansi.rs +++ b/crates/pampa/src/writers/ansi.rs @@ -1329,7 +1329,7 @@ fn write_inline( Inline::NoteReference(_) => { // Ignore note references } - Inline::Attr(_, _) => { + Inline::Attr(_) => { // Ignore standalone attributes } Inline::Insert(insert) => { diff --git a/crates/pampa/src/writers/html.rs b/crates/pampa/src/writers/html.rs index 850001d7b..f7b8cf44e 100644 --- a/crates/pampa/src/writers/html.rs +++ b/crates/pampa/src/writers/html.rs @@ -858,7 +858,7 @@ fn write_inline( write!(ctx, "")?; } // Quarto extensions - render as raw HTML or skip - Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_, _) => { + Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_) => { // These should not appear in final output } Inline::Insert(ins) => { diff --git a/crates/pampa/src/writers/html_source.rs b/crates/pampa/src/writers/html_source.rs index ee6b437eb..6f0e7aea8 100644 --- a/crates/pampa/src/writers/html_source.rs +++ b/crates/pampa/src/writers/html_source.rs @@ -404,7 +404,7 @@ fn walk_inline( } } } - Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_, _) => { + Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_) => { // Quarto extensions - no children to walk } Inline::Insert(ins) => { diff --git a/crates/pampa/src/writers/incremental.rs b/crates/pampa/src/writers/incremental.rs index b3b13ae06..69486e286 100644 --- a/crates/pampa/src/writers/incremental.rs +++ b/crates/pampa/src/writers/incremental.rs @@ -16,7 +16,6 @@ use quarto_ast_reconcile::types::{ }; use quarto_ast_reconcile::{structural_eq_blocks, structural_eq_inlines}; use quarto_pandoc_types::config_value::{ConfigMapEntry, ConfigValue, ConfigValueKind}; -use quarto_source_map::SourceInfo; use std::ops::Range; use super::qmd; @@ -447,35 +446,10 @@ fn ensure_trailing_newline<'a>( /// Extract the byte range (start..end) from a Block's source_info. fn block_source_span(block: &Block) -> Range { - let si = block_source_info(block); + let si = block.source_info(); si.start_offset()..si.end_offset() } -/// Extract the SourceInfo from a Block. -fn block_source_info(block: &Block) -> &SourceInfo { - match block { - Block::Paragraph(p) => &p.source_info, - Block::Header(h) => &h.source_info, - Block::CodeBlock(cb) => &cb.source_info, - Block::BlockQuote(bq) => &bq.source_info, - Block::BulletList(bl) => &bl.source_info, - Block::OrderedList(ol) => &ol.source_info, - Block::Div(d) => &d.source_info, - Block::HorizontalRule(hr) => &hr.source_info, - Block::Table(t) => &t.source_info, - Block::RawBlock(rb) => &rb.source_info, - Block::Plain(p) => &p.source_info, - Block::LineBlock(lb) => &lb.source_info, - Block::DefinitionList(dl) => &dl.source_info, - Block::Figure(f) => &f.source_info, - Block::BlockMetadata(m) => &m.source_info, - Block::NoteDefinitionPara(nd) => &nd.source_info, - Block::NoteDefinitionFencedBlock(nd) => &nd.source_info, - Block::CaptionBlock(cb) => &cb.source_info, - Block::Custom(cn) => &cn.source_info, - } -} - /// Write a single block to a string using the standard QMD writer. fn write_block_to_string( block: &Block, @@ -817,59 +791,15 @@ pub fn inline_children(inline: &Inline) -> &[Inline] { | Inline::RawInline(_) | Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) + | Inline::Attr(_) | Inline::Note(_) // Note contains Blocks, not Inlines | Inline::Custom(_) => &[], } } -/// Extract the SourceInfo from an Inline. -pub fn inline_source_info(inline: &Inline) -> &SourceInfo { - match inline { - Inline::Str(s) => &s.source_info, - Inline::Emph(e) => &e.source_info, - Inline::Strong(s) => &s.source_info, - Inline::Underline(u) => &u.source_info, - Inline::Strikeout(s) => &s.source_info, - Inline::Superscript(s) => &s.source_info, - Inline::Subscript(s) => &s.source_info, - Inline::SmallCaps(s) => &s.source_info, - Inline::Quoted(q) => &q.source_info, - Inline::Cite(c) => &c.source_info, - Inline::Code(c) => &c.source_info, - Inline::Space(s) => &s.source_info, - Inline::SoftBreak(s) => &s.source_info, - Inline::LineBreak(l) => &l.source_info, - Inline::Math(m) => &m.source_info, - Inline::RawInline(r) => &r.source_info, - Inline::Link(l) => &l.source_info, - Inline::Image(i) => &i.source_info, - Inline::Note(n) => &n.source_info, - Inline::Span(s) => &s.source_info, - Inline::Shortcode(sc) => &sc.source_info, - Inline::NoteReference(nr) => &nr.source_info, - Inline::Attr(_, attr_si) => { - // Attr inlines don't have a single source_info like other inlines. - // Use the id source if available, otherwise return a static default. - if let Some(ref id_si) = attr_si.id { - id_si - } else { - static DUMMY: std::sync::LazyLock = - std::sync::LazyLock::new(SourceInfo::default); - &DUMMY - } - } - Inline::Insert(i) => &i.source_info, - Inline::Delete(d) => &d.source_info, - Inline::Highlight(h) => &h.source_info, - Inline::EditComment(e) => &e.source_info, - Inline::Custom(c) => &c.source_info, - } -} - /// Extract the byte range (start..end) from an Inline's source_info. pub fn inline_source_span(inline: &Inline) -> Range { - let si = inline_source_info(inline); + let si = inline.source_info(); si.start_offset()..si.end_offset() } diff --git a/crates/pampa/src/writers/json.rs b/crates/pampa/src/writers/json.rs index 6293d504c..3437c23d6 100644 --- a/crates/pampa/src/writers/json.rs +++ b/crates/pampa/src/writers/json.rs @@ -763,12 +763,12 @@ fn write_inline(inline: &Inline, ctx: &mut JsonWriterContext) -> Value { let attr = (String::new(), vec!["footnote-ref".to_string()], attr_hash); node_with_source("Span", Some(json!([write_attr(&attr), []])), ¬e_ref.source_info, ctx) } - Inline::Attr(_attr, attr_source) => { + Inline::Attr(inline_attr) => { // Defensive: Standalone attributes should not reach JSON writer ctx.errors.push( DiagnosticMessageBuilder::error("Standalone attribute not supported in JSON format") .with_code("Q-3-32") - .with_location(attr_source.id.clone().unwrap_or_default()) + .with_location(inline_attr.attr_source.id.clone().unwrap_or_default()) .problem("Cannot render standalone attributes in JSON format") .add_detail("Standalone attributes should be attached to elements during parsing") .add_hint("This may indicate a parsing issue or unsupported syntax") @@ -2617,13 +2617,13 @@ fn stream_write_inline( }, ) } - Inline::Attr(_attr, attr_source) => { + Inline::Attr(inline_attr) => { ctx.errors.push( DiagnosticMessageBuilder::error( "Standalone attribute not supported in JSON format", ) .with_code("Q-3-32") - .with_location(attr_source.id.clone().unwrap_or_default()) + .with_location(inline_attr.attr_source.id.clone().unwrap_or_default()) .problem("Cannot render standalone attributes in JSON format") .add_detail("Standalone attributes should be attached to elements during parsing") .add_hint("This may indicate a parsing issue or unsupported syntax") diff --git a/crates/pampa/src/writers/native.rs b/crates/pampa/src/writers/native.rs index f99be91f9..64d80b201 100644 --- a/crates/pampa/src/writers/native.rs +++ b/crates/pampa/src/writers/native.rs @@ -381,7 +381,7 @@ fn write_inline( ); // Skip this inline } - Inline::Attr(_attr, attr_source) => { + Inline::Attr(inline_attr) => { // Extension error - standalone attributes not supported in native format let mut builder = quarto_error_reporting::DiagnosticMessageBuilder::error( "Standalone attributes not supported in native format", @@ -390,7 +390,7 @@ fn write_inline( .problem("Cannot render standalone attribute in native format"); // Add location if available from attr id - if let Some(ref source_info) = attr_source.id { + if let Some(ref source_info) = inline_attr.attr_source.id { builder = builder.with_location(source_info.clone()); } diff --git a/crates/pampa/src/writers/plaintext.rs b/crates/pampa/src/writers/plaintext.rs index c90027f86..f2174ec60 100644 --- a/crates/pampa/src/writers/plaintext.rs +++ b/crates/pampa/src/writers/plaintext.rs @@ -168,7 +168,7 @@ fn write_inline( Inline::NoteReference(NoteReference { source_info, .. }) => { ctx.warn_dropped_node("NoteReference", source_info); } - Inline::Attr(_, _) => { + Inline::Attr(_) => { // Attr uses AttrSourceInfo, not SourceInfo, so we drop silently } Inline::Custom(custom) => { @@ -885,14 +885,14 @@ mod tests { fn test_attr_inline_silently_dropped() { let inlines = vec![ make_str("text"), - Inline::Attr( + Inline::Attr(crate::pandoc::inline::InlineAttr::new( ( "id".to_string(), vec!["class".to_string()], hashlink::LinkedHashMap::new(), ), crate::pandoc::attr::AttrSourceInfo::empty(), - ), + )), ]; let (result, diags) = inlines_to_string(&inlines); // Attr is dropped silently (uses AttrSourceInfo, not SourceInfo) diff --git a/crates/pampa/src/writers/qmd.rs b/crates/pampa/src/writers/qmd.rs index ffecd914b..345f726fd 100644 --- a/crates/pampa/src/writers/qmd.rs +++ b/crates/pampa/src/writers/qmd.rs @@ -1792,7 +1792,7 @@ fn write_inline( crate::pandoc::Inline::Delete(node) => write_delete(node, buf, ctx), crate::pandoc::Inline::Insert(node) => write_insert(node, buf, ctx), crate::pandoc::Inline::Shortcode(node) => write_shortcode(node, buf, ctx), - crate::pandoc::Inline::Attr(node, _) => write_attr(node, buf, ctx), + crate::pandoc::Inline::Attr(node) => write_attr(&node.attr, buf, ctx), crate::pandoc::Inline::NoteReference(node) => write_notereference(node, buf, ctx), crate::pandoc::Inline::Note(node) => write_note(node, buf, ctx), crate::pandoc::Inline::RawInline(node) => write_rawinline(node, buf, ctx), @@ -2045,3 +2045,68 @@ fn write_impl( } Ok(()) } + +/// Serialize a Pandoc AST to QMD bytes and return source provenance. +/// +/// The returned `SourceInfo::Concat` maps byte ranges in the output to the +/// `source_info` of the AST nodes that produced them. The pieces tile the +/// entire output with no gaps: YAML frontmatter is one piece, and each +/// top-level block (including its preceding blank-line separator) is one piece. +/// +/// Use `source_info.map_offset(byte_offset, &source_context)` to resolve a +/// position in the serialized text back to the original source file and line. +pub fn write_with_source_info( + pandoc: &Pandoc, +) -> Result<(Vec, quarto_source_map::SourceInfo), Vec> +{ + let mut ctx = QmdWriterContext::new(); + let mut buf = Vec::new(); + + let source_info = match write_impl_tracked(pandoc, &mut buf, &mut ctx) { + Ok(si) => si, + Err(e) => { + return Err(vec![ + quarto_error_reporting::DiagnosticMessageBuilder::error("IO error during write") + .with_code("Q-3-1") + .problem(format!("Failed to write QMD output: {}", e)) + .build(), + ]); + } + }; + + if !ctx.errors.is_empty() { + return Err(ctx.errors); + } + + Ok((buf, source_info)) +} + +/// Like `write_impl` but tracks which AST node produced each byte range. +fn write_impl_tracked( + pandoc: &Pandoc, + buf: &mut Vec, + ctx: &mut QmdWriterContext, +) -> std::io::Result { + let mut pieces: Vec<(quarto_source_map::SourceInfo, usize)> = Vec::new(); + + // Track YAML frontmatter as a single piece + let meta_start = buf.len(); + let mut need_newline = write_config_value_meta(&pandoc.meta, buf, ctx)?; + let meta_len = buf.len() - meta_start; + if meta_len > 0 { + pieces.push((pandoc.meta.source_info.clone(), meta_len)); + } + + // Track each block — include preceding blank line in measurement + for block in &pandoc.blocks { + let start = buf.len(); + if need_newline { + writeln!(buf)?; + } + write_block(block, buf, ctx)?; + pieces.push((block.source_info().clone(), buf.len() - start)); + need_newline = true; + } + + Ok(quarto_source_map::SourceInfo::concat(pieces)) +} diff --git a/crates/pampa/tests/incremental_writer_investigation.rs b/crates/pampa/tests/incremental_writer_investigation.rs index 601f40f11..f3e0c5a77 100644 --- a/crates/pampa/tests/incremental_writer_investigation.rs +++ b/crates/pampa/tests/incremental_writer_investigation.rs @@ -13,7 +13,6 @@ use pampa::pandoc::Block; use pampa::readers; -use quarto_source_map::SourceInfo; // ============================================================================= // Helpers @@ -33,35 +32,10 @@ fn parse_qmd(input: &str) -> pampa::pandoc::Pandoc { /// Extract the source span (start_offset, end_offset) from a Block's source_info. fn block_span(block: &Block) -> (usize, usize) { - let si = block_source_info(block); + let si = block.source_info(); (si.start_offset(), si.end_offset()) } -/// Extract the SourceInfo from a Block. -fn block_source_info(block: &Block) -> &SourceInfo { - match block { - Block::Paragraph(p) => &p.source_info, - Block::Header(h) => &h.source_info, - Block::CodeBlock(cb) => &cb.source_info, - Block::BlockQuote(bq) => &bq.source_info, - Block::BulletList(bl) => &bl.source_info, - Block::OrderedList(ol) => &ol.source_info, - Block::Div(d) => &d.source_info, - Block::HorizontalRule(hr) => &hr.source_info, - Block::Table(t) => &t.source_info, - Block::RawBlock(rb) => &rb.source_info, - Block::Plain(p) => &p.source_info, - Block::LineBlock(lb) => &lb.source_info, - Block::DefinitionList(dl) => &dl.source_info, - Block::Figure(f) => &f.source_info, - Block::BlockMetadata(m) => &m.source_info, - Block::NoteDefinitionPara(nd) => &nd.source_info, - Block::NoteDefinitionFencedBlock(nd) => &nd.source_info, - Block::CaptionBlock(cb) => &cb.source_info, - Block::Custom(cn) => &cn.source_info, - } -} - /// Extract the text that a block's source span covers in the original input. #[allow(dead_code)] fn block_text<'a>(input: &'a str, block: &Block) -> &'a str { diff --git a/crates/pampa/tests/inline_span_investigation.rs b/crates/pampa/tests/inline_span_investigation.rs index cf8d4e7c1..b62361c4d 100644 --- a/crates/pampa/tests/inline_span_investigation.rs +++ b/crates/pampa/tests/inline_span_investigation.rs @@ -18,7 +18,6 @@ use pampa::pandoc::{Block, Inline}; use pampa::readers; -use quarto_source_map::SourceInfo; // ============================================================================= // Helpers @@ -38,49 +37,10 @@ fn parse_qmd(input: &str) -> pampa::pandoc::Pandoc { /// Extract source span (start, end) from an Inline's source_info. fn inline_span(inline: &Inline) -> (usize, usize) { - let si = inline_source_info(inline); + let si = inline.source_info(); (si.start_offset(), si.end_offset()) } -fn inline_source_info(inline: &Inline) -> &SourceInfo { - match inline { - Inline::Str(s) => &s.source_info, - Inline::Emph(e) => &e.source_info, - Inline::Strong(s) => &s.source_info, - Inline::Underline(u) => &u.source_info, - Inline::Strikeout(s) => &s.source_info, - Inline::Superscript(s) => &s.source_info, - Inline::Subscript(s) => &s.source_info, - Inline::SmallCaps(s) => &s.source_info, - Inline::Quoted(q) => &q.source_info, - Inline::Cite(c) => &c.source_info, - Inline::Code(c) => &c.source_info, - Inline::Space(s) => &s.source_info, - Inline::SoftBreak(s) => &s.source_info, - Inline::LineBreak(l) => &l.source_info, - Inline::Math(m) => &m.source_info, - Inline::RawInline(r) => &r.source_info, - Inline::Link(l) => &l.source_info, - Inline::Image(i) => &i.source_info, - Inline::Note(n) => &n.source_info, - Inline::Span(s) => &s.source_info, - Inline::Shortcode(sc) => &sc.source_info, - Inline::NoteReference(nr) => &nr.source_info, - // Attr is a special case — AttrSourceInfo doesn't have a single span. - // Use a dummy; Attr inlines are rare and won't appear in our tests. - Inline::Attr(_, _) => { - static DUMMY: std::sync::LazyLock = - std::sync::LazyLock::new(SourceInfo::default); - &DUMMY - } - Inline::Insert(i) => &i.source_info, - Inline::Delete(d) => &d.source_info, - Inline::Highlight(h) => &h.source_info, - Inline::EditComment(e) => &e.source_info, - Inline::Custom(c) => &c.source_info, - } -} - fn inline_type_name(inline: &Inline) -> &'static str { match inline { Inline::Str(_) => "Str", @@ -105,7 +65,7 @@ fn inline_type_name(inline: &Inline) -> &'static str { Inline::Span(_) => "Span", Inline::Shortcode(_) => "Shortcode", Inline::NoteReference(_) => "NoteReference", - Inline::Attr(_, _) => "Attr", + Inline::Attr(_) => "Attr", Inline::Insert(_) => "Insert", Inline::Delete(_) => "Delete", Inline::Highlight(_) => "Highlight", diff --git a/crates/pampa/tests/qmd_writer_source_info.rs b/crates/pampa/tests/qmd_writer_source_info.rs new file mode 100644 index 000000000..9608f2fa9 --- /dev/null +++ b/crates/pampa/tests/qmd_writer_source_info.rs @@ -0,0 +1,235 @@ +//! Tests for `write_with_source_info` — verifying that the QMD writer +//! produces a SourceInfo::Concat that tiles the entire output and maps +//! byte offsets back to the correct source locations. + +use pampa::writers::qmd::write_with_source_info; +use quarto_pandoc_types::block::{CodeBlock, Header, Paragraph}; +use quarto_pandoc_types::config_value::ConfigValue; +use quarto_pandoc_types::inline::Str; +use quarto_pandoc_types::pandoc::Pandoc; +use quarto_pandoc_types::{Block, Inline}; +use quarto_source_map::{FileId, SourceContext, SourceInfo}; + +fn si(file: usize, start: usize, end: usize) -> SourceInfo { + SourceInfo::original(FileId(file), start, end) +} + +fn str_inline(text: &str, source_info: SourceInfo) -> Inline { + Inline::Str(Str { + text: text.to_string(), + source_info, + }) +} + +fn paragraph(text: &str, source_info: SourceInfo) -> Block { + Block::Paragraph(Paragraph { + content: vec![str_inline(text, source_info.clone())], + source_info, + }) +} + +fn code_block(code: &str, source_info: SourceInfo) -> Block { + Block::CodeBlock(CodeBlock { + attr: quarto_pandoc_types::attr::empty_attr(), + text: code.to_string(), + source_info, + attr_source: quarto_pandoc_types::attr::AttrSourceInfo::empty(), + }) +} + +fn header(text: &str, level: usize, source_info: SourceInfo) -> Block { + Block::Header(Header { + level, + attr: quarto_pandoc_types::attr::empty_attr(), + content: vec![str_inline(text, source_info.clone())], + source_info, + attr_source: quarto_pandoc_types::attr::AttrSourceInfo::empty(), + }) +} + +#[test] +fn concat_piece_lengths_sum_to_buffer_length() { + let pandoc = Pandoc { + meta: ConfigValue::default(), + blocks: vec![ + paragraph("Hello", si(0, 0, 5)), + paragraph("World", si(0, 6, 11)), + ], + }; + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + match &source_info { + SourceInfo::Concat { pieces } => { + let total_len: usize = pieces.iter().map(|p| p.length).sum(); + assert_eq!( + total_len, + buf.len(), + "Concat pieces must tile the entire buffer. \ + Pieces total: {}, buffer len: {}", + total_len, + buf.len() + ); + } + _ => panic!("Expected Concat, got {:?}", source_info), + } +} + +#[test] +fn concat_covers_output_with_frontmatter() { + // Build an AST with YAML frontmatter and a block + let mut meta = ConfigValue::new_string("My Title", si(0, 4, 14)); + // Wrap in a map with key "title" + let entries = vec![quarto_pandoc_types::config_value::ConfigMapEntry { + key: "title".to_string(), + key_source: SourceInfo::default(), + value: meta, + }]; + meta = ConfigValue::new_map(entries, si(0, 0, 25)); + + let pandoc = Pandoc { + meta, + blocks: vec![paragraph("Content", si(0, 30, 37))], + }; + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + match &source_info { + SourceInfo::Concat { pieces } => { + // Should have 2 pieces: frontmatter + paragraph + assert_eq!(pieces.len(), 2, "Expected 2 pieces (frontmatter + block)"); + let total_len: usize = pieces.iter().map(|p| p.length).sum(); + assert_eq!(total_len, buf.len()); + } + _ => panic!("Expected Concat, got {:?}", source_info), + } +} + +#[test] +fn blocks_from_different_files_map_correctly() { + // Simulate include expansion: blocks from two different files + let block_main = paragraph("Main content", si(0, 0, 12)); + let block_included = code_block("x = 1", si(1, 0, 5)); + + let pandoc = Pandoc { + meta: ConfigValue::default(), + blocks: vec![block_main, block_included], + }; + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + // Set up a SourceContext so map_offset can resolve + let mut ctx = SourceContext::new(); + let _fid0 = ctx.add_file("main.qmd".to_string(), Some("Main content".to_string())); + let _fid1 = ctx.add_file("included.qmd".to_string(), Some("x = 1".to_string())); + + let output = String::from_utf8(buf).unwrap(); + + // Find where "x = 1" starts in the output (inside the code block) + let code_pos = output.find("x = 1").expect("code should be in output"); + + let mapped = source_info + .map_offset(code_pos, &ctx) + .expect("should resolve"); + assert_eq!( + mapped.file_id, + FileId(1), + "Code block offset should map to the included file (FileId(1))" + ); +} + +#[test] +fn map_offset_resolves_block_in_single_file() { + let pandoc = Pandoc { + meta: ConfigValue::default(), + blocks: vec![ + header("Title", 1, si(0, 0, 8)), + paragraph("Body text", si(0, 9, 18)), + ], + }; + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + let mut ctx = SourceContext::new(); + ctx.add_file( + "test.qmd".to_string(), + Some("# Title\nBody text".to_string()), + ); + + let output = String::from_utf8(buf).unwrap(); + + // Offset in the "Body text" paragraph + let body_pos = output.find("Body text").expect("should find body"); + let mapped = source_info + .map_offset(body_pos, &ctx) + .expect("should resolve"); + assert_eq!(mapped.file_id, FileId(0)); + // The mapped offset should be within the paragraph's source range (9..18) + assert!( + mapped.location.offset >= 9 && mapped.location.offset <= 18, + "Expected offset in range 9..18, got {}", + mapped.location.offset + ); +} + +#[test] +fn no_blocks_produces_empty_or_frontmatter_only() { + // No blocks, no meta + let pandoc = Pandoc { + meta: ConfigValue::default(), + blocks: vec![], + }; + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + match &source_info { + SourceInfo::Concat { pieces } => { + let total_len: usize = pieces.iter().map(|p| p.length).sum(); + assert_eq!(total_len, buf.len()); + } + _ => { + // Empty concat or default is fine if buffer is empty + assert!( + buf.is_empty(), + "Non-Concat SourceInfo but buffer is not empty" + ); + } + } +} + +#[test] +fn round_trip_code_block_offset_accuracy() { + // Parse a real file, serialize, check offset maps back approximately + let input = "---\ntitle: test\n---\n\n# Header\n\n```python\nprint('hello')\n```\n"; + + let mut stderr = Vec::new(); + let (pandoc, _ast_context, _warnings) = + pampa::readers::qmd::read(input.as_bytes(), false, "test.qmd", &mut stderr, true, None) + .expect("parse failed"); + + let (buf, source_info) = write_with_source_info(&pandoc).unwrap(); + + let mut ctx = SourceContext::new(); + ctx.add_file("test.qmd".to_string(), Some(input.to_string())); + + let output = String::from_utf8(buf).unwrap(); + + // Find the code content in the serialized output + if let Some(code_pos) = output.find("print('hello')") { + let mapped = source_info.map_offset(code_pos, &ctx); + assert!(mapped.is_some(), "Code block offset should be resolvable"); + let mapped = mapped.unwrap(); + assert_eq!(mapped.file_id, FileId(0)); + // The original "print('hello')" starts around offset 42 in the input + let original_pos = input.find("print('hello')").unwrap(); + // Allow some tolerance for fence formatting differences + let diff = (mapped.location.offset as isize - original_pos as isize).unsigned_abs(); + assert!( + diff <= 10, + "Mapped offset {} should be close to original {}, diff was {}", + mapped.location.offset, + original_pos, + diff + ); + } +} diff --git a/crates/pampa/tests/test_location_health.rs b/crates/pampa/tests/test_location_health.rs index bd4b6b671..6226105ab 100644 --- a/crates/pampa/tests/test_location_health.rs +++ b/crates/pampa/tests/test_location_health.rs @@ -359,7 +359,7 @@ fn collect_source_info_from_inline(inline: &Inline, source_infos: &mut Vec { source_infos.push(note_ref.source_info.clone()); } - Inline::Attr(_, _) => { + Inline::Attr(_) => { // Attr doesn't have source info - it's just metadata } Inline::Insert(insert) => { diff --git a/crates/pampa/tests/test_treesitter_coverage.rs b/crates/pampa/tests/test_treesitter_coverage.rs index b20f0553b..1cfeea670 100644 --- a/crates/pampa/tests/test_treesitter_coverage.rs +++ b/crates/pampa/tests/test_treesitter_coverage.rs @@ -24,14 +24,14 @@ fn parse_qmd(input: &str) -> pampa::pandoc::Pandoc { } // ============================================================================ -// List tests - exercise get_block_source_info with different block types +// List tests - exercise Block::source_info() with different block types // ============================================================================ #[test] fn test_list_with_code_block_item() { // This creates a list item ending with a code block, exercising Block::CodeBlock path - // in get_block_source_info during loose list detection - // Note: get_block_source_info is called on the LAST block of each item + // in Block::source_info() during loose list detection + // Note: Block::source_info() is called on the LAST block of each item let input = r#"- First item - Item ending with code: @@ -138,7 +138,7 @@ fn test_list_with_nested_ordered_list() { #[test] fn test_list_with_table_item() { // This creates a list item ending with a pipe table, exercising Block::Table path - // Note: get_block_source_info is called on the LAST block of each item + // Note: Block::source_info() is called on the LAST block of each item let input = r#"- First item - Item ending with table: @@ -168,7 +168,7 @@ fn test_list_with_table_item() { #[test] fn test_list_with_div_item() { // This creates a list item ending with a fenced div, exercising Block::Div path - // Note: get_block_source_info is called on the LAST block of each item + // Note: Block::source_info() is called on the LAST block of each item let input = r#"- First item - Item ending with div: @@ -198,7 +198,7 @@ fn test_list_with_div_item() { #[test] fn test_list_with_horizontal_rule_item() { // This creates a list item ending with a horizontal rule, exercising Block::HorizontalRule path - // Note: get_block_source_info is called on the LAST block of each item + // Note: Block::source_info() is called on the LAST block of each item let input = r#"- First item - Item ending with rule: diff --git a/crates/quarto-ast-reconcile/src/hash.rs b/crates/quarto-ast-reconcile/src/hash.rs index 08b1108ab..f425dc22a 100644 --- a/crates/quarto-ast-reconcile/src/hash.rs +++ b/crates/quarto-ast-reconcile/src/hash.rs @@ -360,8 +360,8 @@ fn compute_inline_hash_inner(inline: &Inline, cache: &mut HashCache<'_>) -> u64 Inline::NoteReference(nr) => { nr.id.hash(&mut hasher); } - Inline::Attr(attr, _attr_source) => { - hash_attr(attr, &mut hasher); + Inline::Attr(a) => { + hash_attr(&a.attr, &mut hasher); } Inline::Insert(i) => { hash_attr(&i.attr, &mut hasher); @@ -641,7 +641,7 @@ pub fn structural_eq_inline(a: &Inline, b: &Inline) -> bool { && a.keyword_args == b.keyword_args } (Inline::NoteReference(a), Inline::NoteReference(b)) => a.id == b.id, - (Inline::Attr(a, _), Inline::Attr(b, _)) => attr_eq(a, b), + (Inline::Attr(a), Inline::Attr(b)) => attr_eq(&a.attr, &b.attr), (Inline::Insert(a), Inline::Insert(b)) => { attr_eq(&a.attr, &b.attr) && structural_eq_inlines(&a.content, &b.content) } diff --git a/crates/quarto-ast-reconcile/src/remap.rs b/crates/quarto-ast-reconcile/src/remap.rs index 38f422aac..5e4ecec67 100644 --- a/crates/quarto-ast-reconcile/src/remap.rs +++ b/crates/quarto-ast-reconcile/src/remap.rs @@ -337,8 +337,9 @@ where remap_source_info(&mut i.source_info, map); } Inline::NoteReference(i) => remap_source_info(&mut i.source_info, map), - Inline::Attr(_attr, attr_source) => { - remap_attr_source(attr_source, map); + Inline::Attr(a) => { + remap_attr_source(&mut a.attr_source, map); + remap_source_info(&mut a.source_info, map); } Inline::Insert(i) => { for c in &mut i.content { diff --git a/crates/quarto-citeproc/src/output.rs b/crates/quarto-citeproc/src/output.rs index b2424a665..72e4bfbda 100644 --- a/crates/quarto-citeproc/src/output.rs +++ b/crates/quarto-citeproc/src/output.rs @@ -2169,7 +2169,7 @@ fn inline_to_markdown_string(inline: &quarto_pandoc_types::Inline, result: &mut // Quarto extensions - render content or ignore Inline::Shortcode(_) => {} Inline::NoteReference(_) => {} - Inline::Attr(_, _) => {} + Inline::Attr(_) => {} Inline::Insert(i) => { for child in &i.content { inline_to_markdown_string(child, result); @@ -5150,7 +5150,7 @@ fn render_inline_to_csl_html_with_ctx( // Quarto-specific types Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) + | Inline::Attr(_) | Inline::Insert(_) | Inline::Delete(_) | Inline::Highlight(_) diff --git a/crates/quarto-core/src/engine/context.rs b/crates/quarto-core/src/engine/context.rs index 9739fd8bd..7b0961c43 100644 --- a/crates/quarto-core/src/engine/context.rs +++ b/crates/quarto-core/src/engine/context.rs @@ -8,8 +8,10 @@ //! Execution context and result types for engines. use std::path::PathBuf; +use std::sync::Arc; use quarto_pandoc_types::ConfigValue; +use quarto_source_map::{SourceContext, SourceInfo}; use crate::stage::PandocIncludes; @@ -51,6 +53,24 @@ pub struct ExecutionContext { /// For example, for `engine: { jupyter: { kernel: python3 } }`, /// this would contain the `{ kernel: python3 }` map. pub engine_config: Option, + + /// Source provenance for the input text. + /// + /// Maps byte offsets in the engine's input `&str` back to original source + /// files (possibly through include expansion boundaries). + /// + /// Use `source_info.map_offset(byte_offset, &source_context)` to resolve + /// a position in the engine's input text to the original file, line, and + /// column. + pub source_info: SourceInfo, + + /// Source context for resolving `FileId`s in `source_info`. + /// + /// Contains file paths and content needed by `map_offset()` to convert + /// byte offsets to file/line/column locations. Shared via `Arc` because + /// the context is finalized after include expansion and doesn't change + /// during engine execution. + pub source_context: Arc, } impl ExecutionContext { @@ -69,6 +89,8 @@ impl ExecutionContext { format: format.into(), quiet: false, engine_config: None, + source_info: SourceInfo::default(), + source_context: Arc::new(SourceContext::new()), } } @@ -89,6 +111,17 @@ impl ExecutionContext { self.engine_config = config; self } + + /// Set source provenance and context for the engine's input text. + pub fn with_source_info( + mut self, + source_info: SourceInfo, + source_context: Arc, + ) -> Self { + self.source_info = source_info; + self.source_context = source_context; + self + } } /// Result of engine execution. diff --git a/crates/quarto-core/src/pipeline.rs b/crates/quarto-core/src/pipeline.rs index b81d980eb..c1d2a5238 100644 --- a/crates/quarto-core/src/pipeline.rs +++ b/crates/quarto-core/src/pipeline.rs @@ -56,8 +56,9 @@ use crate::stage::CodeHighlightStage; use crate::stage::stages::ApplyTemplateConfig; use crate::stage::{ ApplyTemplateStage, AstTransformsStage, CompileThemeCssStage, EngineExecutionStage, - LoadedSource, MetadataMergeStage, ParseDocumentStage, Pipeline, PipelineData, PipelineStage, - PreEngineSugaringStage, RenderHtmlBodyStage, StageContext, UserFiltersStage, + IncludeExpansionStage, LoadedSource, MetadataMergeStage, ParseDocumentStage, Pipeline, + PipelineData, PipelineStage, PreEngineSugaringStage, RenderHtmlBodyStage, StageContext, + UserFiltersStage, }; use crate::transform::TransformPipeline; use crate::transforms::{ @@ -155,6 +156,7 @@ pub fn build_html_pipeline_stages_with_apply_config( let mut stages: Vec> = vec![ Box::new(ParseDocumentStage::new()), Box::new(MetadataMergeStage::new()), + Box::new(IncludeExpansionStage::new()), Box::new(PreEngineSugaringStage::new()), Box::new(EngineExecutionStage::new()), Box::new(CompileThemeCssStage::new()), @@ -227,6 +229,7 @@ pub fn build_wasm_html_pipeline() -> Pipeline { Box::new(ParseDocumentStage::new()), // No EngineExecutionStage - code cells pass through as-is Box::new(MetadataMergeStage::new()), + Box::new(IncludeExpansionStage::new()), Box::new(PreEngineSugaringStage::new()), Box::new(CompileThemeCssStage::new()), Box::new(UserFiltersStage::pre()), @@ -347,6 +350,7 @@ pub fn build_analysis_pipeline() -> Pipeline { let stages: Vec> = vec![ Box::new(ParseDocumentStage::new()), Box::new(MetadataMergeStage::new()), + Box::new(IncludeExpansionStage::new()), Box::new(PreEngineSugaringStage::new()), Box::new(AstTransformsStage::with_pipeline( build_analysis_transform_pipeline(), @@ -931,32 +935,34 @@ mod tests { #[test] fn test_build_html_pipeline_stages() { let stages = build_html_pipeline_stages(); - assert_eq!(stages.len(), 11); + assert_eq!(stages.len(), 12); assert_eq!(stages[0].name(), "parse-document"); assert_eq!(stages[1].name(), "metadata-merge"); - assert_eq!(stages[2].name(), "pre-engine-sugaring"); - assert_eq!(stages[3].name(), "engine-execution"); - assert_eq!(stages[4].name(), "compile-theme-css"); - assert_eq!(stages[5].name(), "user-filters-pre"); - assert_eq!(stages[6].name(), "ast-transforms"); - assert_eq!(stages[7].name(), "user-filters-post"); - assert_eq!(stages[8].name(), "code-highlight"); - assert_eq!(stages[9].name(), "render-html-body"); - assert_eq!(stages[10].name(), "apply-template"); + assert_eq!(stages[2].name(), "include-expansion"); + assert_eq!(stages[3].name(), "pre-engine-sugaring"); + assert_eq!(stages[4].name(), "engine-execution"); + assert_eq!(stages[5].name(), "compile-theme-css"); + assert_eq!(stages[6].name(), "user-filters-pre"); + assert_eq!(stages[7].name(), "ast-transforms"); + assert_eq!(stages[8].name(), "user-filters-post"); + assert_eq!(stages[9].name(), "code-highlight"); + assert_eq!(stages[10].name(), "render-html-body"); + assert_eq!(stages[11].name(), "apply-template"); } #[test] fn test_build_html_pipeline() { let pipeline = build_html_pipeline(); - assert_eq!(pipeline.len(), 11); + assert_eq!(pipeline.len(), 12); } #[test] fn test_build_wasm_html_pipeline() { let pipeline = build_wasm_html_pipeline(); - // WASM pipeline has 10 stages (no engine execution, but otherwise - // the same as the native HTML pipeline). - assert_eq!(pipeline.len(), 10); + // WASM pipeline has 11 stages (no engine execution, but otherwise + // the same as the native HTML pipeline: includes include-expansion + // and code-highlight). + assert_eq!(pipeline.len(), 11); } #[test] @@ -964,8 +970,8 @@ mod tests { use crate::stage::PipelineDataKind; let pipeline = build_analysis_pipeline(); - // Parse + MetadataMerge + PreEngineSugaring + AstTransforms(analysis subset) - assert_eq!(pipeline.len(), 4); + // Parse + MetadataMerge + IncludeExpansion + PreEngineSugaring + AstTransforms(analysis subset) + assert_eq!(pipeline.len(), 5); assert_eq!(pipeline.expected_input(), PipelineDataKind::LoadedSource); assert_eq!(pipeline.expected_output(), PipelineDataKind::DocumentAst); } diff --git a/crates/quarto-core/src/stage/mod.rs b/crates/quarto-core/src/stage/mod.rs index e62c0a4e8..8ebfc8c63 100644 --- a/crates/quarto-core/src/stage/mod.rs +++ b/crates/quarto-core/src/stage/mod.rs @@ -109,8 +109,8 @@ pub use traits::PipelineStage; pub use stages::CodeHighlightStage; pub use stages::{ ApplyTemplateStage, AstTransformsStage, CompileThemeCssStage, EngineExecutionStage, - MetadataMergeStage, ParseDocumentStage, PreEngineSugaringStage, RenderHtmlBodyStage, - UserFiltersStage, + IncludeExpansionStage, MetadataMergeStage, ParseDocumentStage, PreEngineSugaringStage, + RenderHtmlBodyStage, UserFiltersStage, }; // Re-export the trace_event macro diff --git a/crates/quarto-core/src/stage/stages/engine_execution.rs b/crates/quarto-core/src/stage/stages/engine_execution.rs index 52bef435b..fb7a2d96a 100644 --- a/crates/quarto-core/src/stage/stages/engine_execution.rs +++ b/crates/quarto-core/src/stage/stages/engine_execution.rs @@ -189,7 +189,7 @@ impl PipelineStage for EngineExecutionStage { } // Step 4: Serialize AST to QMD for engine execution - let qmd = serialize_ast_to_qmd(&doc_ast.ast)?; + let (qmd, qmd_source_info) = serialize_ast_to_qmd(&doc_ast.ast)?; trace_event!( ctx, @@ -199,6 +199,8 @@ impl PipelineStage for EngineExecutionStage { ); // Step 5: Prepare execution context + // Clone source_context into Arc — it's finalized after include expansion. + let source_context = std::sync::Arc::new(doc_ast.source_context.clone()); let exec_context = ExecutionContext::new( ctx.temp_dir.clone(), ctx.project.dir.clone(), @@ -210,7 +212,8 @@ impl PipelineStage for EngineExecutionStage { } else { Some(ctx.project.dir.clone()) }) - .with_engine_config(detected.config.clone()); + .with_engine_config(detected.config.clone()) + .with_source_info(qmd_source_info, source_context); // Step 6: Execute the engine trace_event!(ctx, EventLevel::Info, "executing engine: {}", engine.name()); @@ -357,18 +360,20 @@ fn intermediate_filename(source_path: &std::path::Path) -> String { /// Uses pampa's QMD writer which preserves code cell attributes. fn serialize_ast_to_qmd( ast: &quarto_pandoc_types::pandoc::Pandoc, -) -> Result { - let mut buffer = Vec::new(); - pampa::writers::qmd::write(ast, &mut buffer).map_err(|diagnostics| { - PipelineError::stage_error_with_diagnostics("engine-execution", diagnostics) - })?; +) -> Result<(String, quarto_source_map::SourceInfo), PipelineError> { + let (buffer, source_info) = + pampa::writers::qmd::write_with_source_info(ast).map_err(|diagnostics| { + PipelineError::stage_error_with_diagnostics("engine-execution", diagnostics) + })?; - String::from_utf8(buffer).map_err(|e| { + let text = String::from_utf8(buffer).map_err(|e| { PipelineError::stage_error( "engine-execution", format!("QMD serialization produced invalid UTF-8: {}", e), ) - }) + })?; + + Ok((text, source_info)) } #[cfg(test)] @@ -665,12 +670,17 @@ mod tests { let content = b"---\ntitle: Test\n---\n\n# Hello\n\nWorld"; let doc_ast = parse_qmd_to_ast(content, "test.qmd"); - let qmd = serialize_ast_to_qmd(&doc_ast.ast).unwrap(); + let (qmd, source_info) = serialize_ast_to_qmd(&doc_ast.ast).unwrap(); // Should contain the title assert!(qmd.contains("title")); // Should contain the heading assert!(qmd.contains("Hello")); + // Should have source provenance + assert!( + matches!(source_info, quarto_source_map::SourceInfo::Concat { .. }), + "Expected Concat SourceInfo from serialization" + ); // Should contain the paragraph assert!(qmd.contains("World")); } @@ -925,4 +935,109 @@ mod tests { assert_eq!(ctx.includes.include_after.len(), 1); assert_eq!(ctx.includes.include_after[0], "
after
"); } + + // === Phase 0C: SourceInfo in ExecutionContext tests === + + #[test] + fn test_execution_context_has_source_info() { + let ctx = ExecutionContext::new( + PathBuf::from("/tmp"), + PathBuf::from("/project"), + PathBuf::from("/project/doc.qmd"), + "html", + ); + // Default source_info should be SourceInfo::default() + assert_eq!(ctx.source_info, quarto_source_map::SourceInfo::default()); + } + + #[test] + fn test_execution_context_with_source_info() { + let si = quarto_source_map::SourceInfo::original(quarto_source_map::FileId(0), 0, 100); + let mut sc = quarto_source_map::SourceContext::new(); + sc.add_file("test.qmd".to_string(), Some("content".to_string())); + let sc = std::sync::Arc::new(sc); + + let ctx = ExecutionContext::new( + PathBuf::from("/tmp"), + PathBuf::from("/project"), + PathBuf::from("/project/doc.qmd"), + "html", + ) + .with_source_info(si.clone(), sc.clone()); + + assert_eq!(ctx.source_info, si); + assert!( + ctx.source_context + .get_file(quarto_source_map::FileId(0)) + .is_some() + ); + } + + #[test] + fn test_serialize_ast_to_qmd_produces_source_info() { + let content = b"# Title\n\nSome body text\n\n```python\nprint('hello')\n```\n"; + let doc_ast = parse_qmd_to_ast(content, "test.qmd"); + + let (qmd, source_info) = serialize_ast_to_qmd(&doc_ast.ast).unwrap(); + + // SourceInfo should be Concat + let pieces = match &source_info { + quarto_source_map::SourceInfo::Concat { pieces } => pieces, + other => panic!("Expected Concat, got {:?}", other), + }; + + // Piece lengths should sum to qmd length + let total: usize = pieces.iter().map(|p| p.length).sum(); + assert_eq!(total, qmd.len()); + + // Should have pieces for the blocks + assert!( + pieces.len() >= 2, + "Expected at least 2 pieces (heading + body)" + ); + } + + #[test] + fn test_source_info_map_offset_single_file() { + let input = b"# Title\n\nBody text here\n"; + let doc_ast = parse_qmd_to_ast(input, "test.qmd"); + + let (qmd, source_info) = serialize_ast_to_qmd(&doc_ast.ast).unwrap(); + + // Build a SourceContext from the doc_ast's ast_context + let source_context = &doc_ast.ast_context.source_context; + + // Find "Body" in the serialized output + let body_pos = qmd.find("Body").expect("should find Body in output"); + let mapped = source_info.map_offset(body_pos, source_context); + assert!( + mapped.is_some(), + "map_offset should resolve for a body text offset" + ); + let mapped = mapped.unwrap(); + assert_eq!( + mapped.file_id, + quarto_source_map::FileId(0), + "Should map to the main file" + ); + } + + #[test] + fn test_source_info_map_offset_start_and_end() { + let input = b"# Title\n\nBody"; + let doc_ast = parse_qmd_to_ast(input, "test.qmd"); + + let (qmd, source_info) = serialize_ast_to_qmd(&doc_ast.ast).unwrap(); + let source_context = &doc_ast.ast_context.source_context; + + // Start of output + let mapped_start = source_info.map_offset(0, source_context); + assert!(mapped_start.is_some(), "Start of output should resolve"); + + // End of output (last valid byte) + if !qmd.is_empty() { + let mapped_end = source_info.map_offset(qmd.len() - 1, source_context); + assert!(mapped_end.is_some(), "End of output should resolve"); + } + } } diff --git a/crates/quarto-core/src/stage/stages/include_expansion.rs b/crates/quarto-core/src/stage/stages/include_expansion.rs new file mode 100644 index 000000000..c7d1d8768 --- /dev/null +++ b/crates/quarto-core/src/stage/stages/include_expansion.rs @@ -0,0 +1,867 @@ +/* + * stage/stages/include_expansion.rs + * Copyright (c) 2025 Posit, PBC + * + * Pipeline stage that expands `{{< include file.qmd >}}` shortcodes + * in the AST before engine execution. + */ + +//! Include shortcode expansion stage. +//! +//! Resolves block-level `{{< include file.qmd >}}` shortcodes by parsing +//! the included file and splicing its AST blocks into the main document. +//! Runs before engine execution so that included code cells are visible +//! to the engine. + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; + +use quarto_pandoc_types::shortcode::ShortcodeArg; +use quarto_pandoc_types::{Block, Inline}; + +use crate::stage::data::DocumentAst; +use crate::stage::{PipelineData, PipelineDataKind, PipelineError, PipelineStage, StageContext}; + +pub struct IncludeExpansionStage; + +impl IncludeExpansionStage { + pub fn new() -> Self { + Self + } +} + +impl Default for IncludeExpansionStage { + fn default() -> Self { + Self::new() + } +} + +#[async_trait(?Send)] +impl PipelineStage for IncludeExpansionStage { + fn name(&self) -> &str { + "include-expansion" + } + + fn input_kind(&self) -> PipelineDataKind { + PipelineDataKind::DocumentAst + } + + fn output_kind(&self) -> PipelineDataKind { + PipelineDataKind::DocumentAst + } + + async fn run( + &self, + input: PipelineData, + ctx: &mut StageContext, + ) -> Result { + let PipelineData::DocumentAst(mut doc) = input else { + return Err(PipelineError::unexpected_input( + self.name(), + self.input_kind(), + input.kind(), + )); + }; + + let mut include_stack = HashSet::new(); + let doc_path = doc.path.clone(); + include_stack.insert(doc_path.clone()); + + expand_includes_in_blocks(&mut doc, ctx, &doc_path, &mut include_stack)?; + + Ok(PipelineData::DocumentAst(doc)) + } +} + +/// Expand include shortcodes in a block list, recursively. +fn expand_includes_in_blocks( + doc: &mut DocumentAst, + ctx: &mut StageContext, + current_file: &Path, + include_stack: &mut HashSet, +) -> Result<(), PipelineError> { + let mut i = 0; + while i < doc.ast.blocks.len() { + if let Some(include_path) = extract_include_path(&doc.ast.blocks[i]) { + // Resolve relative to the including file's directory + let base_dir = current_file.parent().unwrap_or(Path::new(".")); + let resolved = base_dir.join(&include_path); + + // Canonicalize for cycle detection + let canonical = resolved.canonicalize().unwrap_or_else(|_| resolved.clone()); + + // Check for circular includes + if include_stack.contains(&canonical) { + ctx.diagnostics.push( + quarto_error_reporting::DiagnosticMessageBuilder::warning("Circular include") + .with_code("Q-5-1") + .with_location(doc.ast.blocks[i].source_info().clone()) + .problem(format!( + "Circular include detected: '{}' is already being included", + resolved.display() + )) + .add_hint("Check for files that include each other, directly or indirectly") + .build(), + ); + i += 1; + continue; + } + + // Read the included file + let content = match ctx.runtime.file_read(&resolved) { + Ok(bytes) => bytes, + Err(e) => { + ctx.diagnostics.push( + quarto_error_reporting::DiagnosticMessageBuilder::warning( + "Include file not found", + ) + .with_code("Q-5-2") + .with_location(doc.ast.blocks[i].source_info().clone()) + .problem(format!( + "Could not read included file '{}': {}", + resolved.display(), + e + )) + .build(), + ); + i += 1; + continue; + } + }; + + // Parse the included file + let mut stderr_buf = Vec::new(); + let filename = resolved.to_string_lossy().to_string(); + let parse_result = + pampa::readers::qmd::read(&content, false, &filename, &mut stderr_buf, true, None); + + let (included_pandoc, included_ast_context, _warnings) = match parse_result { + Ok(result) => result, + Err(diagnostics) => { + ctx.diagnostics.push( + quarto_error_reporting::DiagnosticMessageBuilder::error( + "Include file parse error", + ) + .with_code("Q-5-3") + .with_location(doc.ast.blocks[i].source_info().clone()) + .problem(format!( + "Failed to parse included file '{}': {} error(s)", + resolved.display(), + diagnostics.len() + )) + .build(), + ); + i += 1; + continue; + } + }; + + // Register included file in BOTH SourceContexts with the same FileId + let content_str = String::from_utf8_lossy(&content).into_owned(); + + // Register in ast_context.source_context (for map_offset resolution) + let new_file_id = if let Some(file_info) = included_ast_context + .source_context + .get_file(quarto_source_map::FileId(0)) + .and_then(|f| f.file_info.clone()) + { + doc.ast_context + .source_context + .add_file_with_info(filename.clone(), file_info) + } else { + doc.ast_context + .source_context + .add_file(filename.clone(), Some(content_str.clone())) + }; + + // Register in top-level source_context (for ariadne error snippets) + // Use add_file which returns a new FileId, but we need the same one. + // Since both contexts grow sequentially, they should stay in sync if + // we register in the same order. However, to be safe we verify. + let snippet_file_id = doc + .source_context + .add_file(filename.clone(), Some(content_str)); + debug_assert_eq!( + new_file_id, snippet_file_id, + "FileId mismatch between ast_context.source_context and source_context" + ); + + // Merge filenames + for name in &included_ast_context.filenames { + if !doc.ast_context.filenames.contains(name) { + doc.ast_context.filenames.push(name.clone()); + } + } + + // Remap FileIds in the parsed AST: FileId(0) → new_file_id + let mut included_blocks = included_pandoc.blocks; + // Remap each block's source info and all nested source info + let mut temp_pandoc = quarto_pandoc_types::pandoc::Pandoc { + meta: quarto_pandoc_types::config_value::ConfigValue::default(), + blocks: included_blocks, + }; + quarto_ast_reconcile::remap_file_ids(&mut temp_pandoc, &|id| { + if id == quarto_source_map::FileId(0) { + new_file_id + } else { + id + } + }); + included_blocks = temp_pandoc.blocks; + + // Replace the paragraph containing the shortcode with included blocks + doc.ast.blocks.remove(i); + let num_inserted = included_blocks.len(); + for (j, block) in included_blocks.into_iter().enumerate() { + doc.ast.blocks.insert(i + j, block); + } + + // Recursively expand includes in the newly inserted blocks + include_stack.insert(canonical.clone()); + + // Process the inserted blocks for nested includes + let mut sub_doc = DocumentAst { + path: resolved.clone(), + ast: quarto_pandoc_types::pandoc::Pandoc { + meta: quarto_pandoc_types::config_value::ConfigValue::default(), + blocks: doc.ast.blocks.split_off(i), + }, + ast_context: doc.ast_context.clone(), + source_context: doc.source_context.clone(), + warnings: vec![], + }; + // Only process the newly inserted blocks + let remaining = sub_doc.ast.blocks.split_off(num_inserted); + expand_includes_in_blocks(&mut sub_doc, ctx, &resolved, include_stack)?; + + // Merge back: expanded blocks + remaining + let mut all_blocks = doc.ast.blocks.clone(); // blocks before i + all_blocks.extend(sub_doc.ast.blocks); + all_blocks.extend(remaining); + doc.ast.blocks = all_blocks; + + // Merge back any context changes from recursion + doc.ast_context = sub_doc.ast_context; + doc.source_context = sub_doc.source_context; + + include_stack.remove(&canonical); + + // Don't increment i — the new blocks at position i may themselves + // have already been expanded, but blocks after the inserted range + // still need processing. Advance past the inserted blocks. + i += num_inserted; + } else { + i += 1; + } + } + Ok(()) +} + +/// Check if a block is a paragraph containing only an include shortcode. +/// Returns the include path if so, None otherwise. +fn extract_include_path(block: &Block) -> Option { + let Block::Paragraph(para) = block else { + return None; + }; + + // Must contain exactly one inline, and it must be a shortcode + if para.content.len() != 1 { + return None; + } + + let Inline::Shortcode(shortcode) = ¶.content[0] else { + return None; + }; + + if shortcode.name != "include" { + return None; + } + + // Extract file path from first positional argument + shortcode.positional_args.first().and_then(|arg| match arg { + ShortcodeArg::String(s) => Some(s.clone()), + _ => None, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use quarto_pandoc_types::block::Paragraph; + use quarto_pandoc_types::inline::Str; + use quarto_pandoc_types::shortcode::Shortcode; + use quarto_source_map::{FileId, SourceInfo}; + use std::collections::HashMap; + + fn make_include_paragraph(path: &str) -> Block { + Block::Paragraph(Paragraph { + content: vec![Inline::Shortcode(Shortcode { + is_escaped: false, + name: "include".to_string(), + positional_args: vec![ShortcodeArg::String(path.to_string())], + keyword_args: HashMap::new(), + source_info: SourceInfo::original(FileId(0), 0, 30), + })], + source_info: SourceInfo::original(FileId(0), 0, 30), + }) + } + + #[test] + fn extract_include_path_from_paragraph() { + let block = make_include_paragraph("other.qmd"); + assert_eq!(extract_include_path(&block), Some("other.qmd".to_string())); + } + + #[test] + fn extract_include_path_non_include_shortcode() { + let block = Block::Paragraph(Paragraph { + content: vec![Inline::Shortcode(Shortcode { + is_escaped: false, + name: "meta".to_string(), + positional_args: vec![ShortcodeArg::String("title".to_string())], + keyword_args: HashMap::new(), + source_info: SourceInfo::default(), + })], + source_info: SourceInfo::default(), + }); + assert_eq!(extract_include_path(&block), None); + } + + #[test] + fn extract_include_path_inline_include_not_detected() { + // Paragraph with text + include shortcode → NOT an include + let block = Block::Paragraph(Paragraph { + content: vec![ + Inline::Str(Str { + text: "some text ".to_string(), + source_info: SourceInfo::default(), + }), + Inline::Shortcode(Shortcode { + is_escaped: false, + name: "include".to_string(), + positional_args: vec![ShortcodeArg::String("file.qmd".to_string())], + keyword_args: HashMap::new(), + source_info: SourceInfo::default(), + }), + ], + source_info: SourceInfo::default(), + }); + assert_eq!(extract_include_path(&block), None); + } + + #[test] + fn extract_include_path_from_non_paragraph() { + // Code blocks are never includes + let block = Block::CodeBlock(quarto_pandoc_types::block::CodeBlock { + attr: quarto_pandoc_types::attr::empty_attr(), + text: "{{< include file.qmd >}}".to_string(), + source_info: SourceInfo::default(), + attr_source: quarto_pandoc_types::attr::AttrSourceInfo::empty(), + }); + assert_eq!(extract_include_path(&block), None); + } + + #[test] + fn extract_include_path_empty_paragraph() { + let block = Block::Paragraph(Paragraph { + content: vec![], + source_info: SourceInfo::default(), + }); + assert_eq!(extract_include_path(&block), None); + } + + // === Integration tests using expand_includes_in_blocks === + + use std::path::PathBuf; + use std::sync::Arc; + + /// Mock runtime that serves file content from an in-memory map. + struct MockFileRuntime { + files: HashMap>, + } + + impl MockFileRuntime { + fn new(files: Vec<(&str, &str)>) -> Self { + Self { + files: files + .into_iter() + .map(|(p, c)| (PathBuf::from(p), c.as_bytes().to_vec())) + .collect(), + } + } + } + + // Delegate all SystemRuntime methods to defaults except file_read/path_exists/canonicalize + macro_rules! mock_runtime_stubs { + () => { + fn file_write( + &self, + _path: &std::path::Path, + _contents: &[u8], + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn file_copy( + &self, + _src: &std::path::Path, + _dst: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn path_rename( + &self, + _old: &std::path::Path, + _new: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn file_remove( + &self, + _path: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn path_metadata( + &self, + _path: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult { + unimplemented!() + } + fn dir_create( + &self, + _path: &std::path::Path, + _recursive: bool, + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn dir_remove( + &self, + _path: &std::path::Path, + _recursive: bool, + ) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn dir_list( + &self, + _path: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult> { + Ok(vec![]) + } + fn cwd(&self) -> quarto_system_runtime::RuntimeResult { + Ok(PathBuf::from("/")) + } + fn temp_dir( + &self, + _template: &str, + ) -> quarto_system_runtime::RuntimeResult { + Ok(quarto_system_runtime::TempDir::new(PathBuf::from( + "/tmp/test", + ))) + } + fn exec_pipe( + &self, + _command: &str, + _args: &[&str], + _stdin: &[u8], + ) -> quarto_system_runtime::RuntimeResult> { + Ok(vec![]) + } + fn exec_command( + &self, + _command: &str, + _args: &[&str], + _stdin: Option<&[u8]>, + ) -> quarto_system_runtime::RuntimeResult { + Ok(quarto_system_runtime::CommandOutput { + code: 0, + stdout: vec![], + stderr: vec![], + }) + } + fn env_get(&self, _name: &str) -> quarto_system_runtime::RuntimeResult> { + Ok(None) + } + fn env_all( + &self, + ) -> quarto_system_runtime::RuntimeResult> { + Ok(std::collections::HashMap::new()) + } + fn os_name(&self) -> &'static str { + "mock" + } + fn arch(&self) -> &'static str { + "mock" + } + fn cpu_time(&self) -> quarto_system_runtime::RuntimeResult { + Ok(0) + } + fn xdg_dir( + &self, + _kind: quarto_system_runtime::XdgDirKind, + _subpath: Option<&std::path::Path>, + ) -> quarto_system_runtime::RuntimeResult { + Ok(PathBuf::from("/xdg")) + } + fn stdout_write(&self, _data: &[u8]) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + fn stderr_write(&self, _data: &[u8]) -> quarto_system_runtime::RuntimeResult<()> { + Ok(()) + } + }; + } + + #[async_trait::async_trait] + impl quarto_system_runtime::SystemRuntime for MockFileRuntime { + fn file_read( + &self, + path: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult> { + self.files.get(path).cloned().ok_or_else(|| { + quarto_system_runtime::RuntimeError::Io(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("mock: file not found: {}", path.display()), + )) + }) + } + fn path_exists( + &self, + path: &std::path::Path, + _kind: Option, + ) -> quarto_system_runtime::RuntimeResult { + Ok(self.files.contains_key(path)) + } + fn canonicalize( + &self, + path: &std::path::Path, + ) -> quarto_system_runtime::RuntimeResult { + Ok(path.to_path_buf()) + } + async fn fetch_url( + &self, + _url: &str, + ) -> quarto_system_runtime::RuntimeResult<(Vec, String)> { + Err(quarto_system_runtime::RuntimeError::NotSupported( + "mock".to_string(), + )) + } + mock_runtime_stubs!(); + } + + fn make_stage_context(runtime: Arc) -> StageContext { + use crate::format::Format; + use crate::project::{DocumentInfo, ProjectContext}; + + let project = ProjectContext { + dir: PathBuf::from("/project"), + config: crate::project::ProjectConfig::default(), + is_single_file: true, + files: vec![], + output_dir: PathBuf::from("/project"), + }; + let doc = DocumentInfo::from_path("/project/doc.qmd"); + let format = Format::html(); + + StageContext::new(runtime, format, project, doc).unwrap() + } + + fn parse_to_doc_ast(content: &str, path: &str) -> DocumentAst { + let mut stderr = Vec::new(); + let (pandoc, ast_context, _warnings) = + pampa::readers::qmd::read(content.as_bytes(), false, path, &mut stderr, true, None) + .expect("parse failed"); + + // Register the main file in source_context (mirrors ParseDocumentStage) + let mut source_context = quarto_source_map::SourceContext::new(); + source_context.add_file(path.to_string(), Some(content.to_string())); + + DocumentAst { + path: PathBuf::from(path), + ast: pandoc, + ast_context, + source_context, + warnings: vec![], + } + } + + #[test] + fn simple_include_replaces_paragraph() { + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/included.qmd", + "Included content", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast( + "Before\n\n{{< include included.qmd >}}\n\nAfter", + "/project/doc.qmd", + ); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert!(ctx.diagnostics.is_empty(), "No errors expected"); + + // Should have 3 blocks: Before paragraph, Included content paragraph, After paragraph + assert_eq!( + doc.ast.blocks.len(), + 3, + "Expected 3 blocks after include expansion, got {}", + doc.ast.blocks.len() + ); + } + + #[test] + fn missing_file_produces_diagnostic() { + let runtime = Arc::new(MockFileRuntime::new(vec![])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast("{{< include nonexistent.qmd >}}", "/project/doc.qmd"); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert_eq!(ctx.diagnostics.len(), 1); + assert!(ctx.diagnostics[0].title.contains("not found")); + } + + #[test] + fn circular_include_produces_diagnostic() { + // doc.qmd includes circular.qmd, which includes doc.qmd + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/circular.qmd", + "{{< include doc.qmd >}}", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast("{{< include circular.qmd >}}", "/project/doc.qmd"); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + // Should have a circular include diagnostic + assert!( + ctx.diagnostics.iter().any(|d| d.title.contains("Circular")), + "Expected circular include diagnostic, got: {:?}", + ctx.diagnostics + ); + } + + #[test] + fn recursive_include_works() { + // doc includes a.qmd, a.qmd includes b.qmd + let runtime = Arc::new(MockFileRuntime::new(vec![ + ("/project/a.qmd", "From A\n\n{{< include b.qmd >}}"), + ("/project/b.qmd", "From B"), + ])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast( + "Before\n\n{{< include a.qmd >}}\n\nAfter", + "/project/doc.qmd", + ); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert!( + ctx.diagnostics.is_empty(), + "No errors expected: {:?}", + ctx.diagnostics + ); + + // Should have 4 blocks: Before, From A, From B, After + assert_eq!( + doc.ast.blocks.len(), + 4, + "Expected 4 blocks after recursive include, got {}", + doc.ast.blocks.len() + ); + } + + #[test] + fn included_file_frontmatter_stripped() { + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/with_yaml.qmd", + "---\ntitle: Included\n---\n\nIncluded body", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast("{{< include with_yaml.qmd >}}", "/project/doc.qmd"); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert!(ctx.diagnostics.is_empty(), "No errors expected"); + + // Should have just the body paragraph, not the YAML metadata + assert_eq!(doc.ast.blocks.len(), 1); + // Verify it's the body content, not metadata + if let Block::Paragraph(p) = &doc.ast.blocks[0] { + let text: String = p + .content + .iter() + .filter_map(|i| { + if let Inline::Str(s) = i { + Some(s.text.as_str()) + } else { + None + } + }) + .collect(); + assert!( + text.contains("Included"), + "Expected 'Included' body text, got: {}", + text + ); + } + } + + #[test] + fn included_blocks_have_correct_file_id() { + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/other.qmd", + "Other content", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast("Main\n\n{{< include other.qmd >}}", "/project/doc.qmd"); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert!(ctx.diagnostics.is_empty()); + assert_eq!(doc.ast.blocks.len(), 2); + + // First block (Main) should have FileId(0) + let main_si = doc.ast.blocks[0].source_info(); + if let SourceInfo::Original { file_id, .. } = main_si { + assert_eq!(*file_id, FileId(0), "Main block should be FileId(0)"); + } + + // Second block (Other content) should have a different FileId (the included file) + let included_si = doc.ast.blocks[1].source_info(); + if let SourceInfo::Original { file_id, .. } = included_si { + assert_ne!( + *file_id, + FileId(0), + "Included block should NOT be FileId(0)" + ); + } + } + + #[test] + fn inline_include_not_expanded() { + // Include shortcode among other inlines should NOT be expanded + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/other.qmd", + "Included", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast("text {{< include other.qmd >}} more", "/project/doc.qmd"); + let original_block_count = doc.ast.blocks.len(); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + // Block count should be unchanged — inline include not expanded + assert_eq!(doc.ast.blocks.len(), original_block_count); + } + + #[test] + fn include_with_code_cell() { + let runtime = Arc::new(MockFileRuntime::new(vec![( + "/project/code.qmd", + "```python\nprint('hello')\n```", + )])); + let mut ctx = make_stage_context(runtime); + + let mut doc = parse_to_doc_ast( + "Before\n\n{{< include code.qmd >}}\n\nAfter", + "/project/doc.qmd", + ); + + let mut include_stack = HashSet::new(); + include_stack.insert(PathBuf::from("/project/doc.qmd")); + + expand_includes_in_blocks( + &mut doc, + &mut ctx, + &PathBuf::from("/project/doc.qmd"), + &mut include_stack, + ) + .unwrap(); + + assert!(ctx.diagnostics.is_empty()); + + // Should have a CodeBlock from the included file + let has_code_block = doc + .ast + .blocks + .iter() + .any(|b| matches!(b, Block::CodeBlock(_))); + assert!( + has_code_block, + "Expected a CodeBlock from included file in the AST" + ); + } +} diff --git a/crates/quarto-core/src/stage/stages/mod.rs b/crates/quarto-core/src/stage/stages/mod.rs index bd0288095..37906b56f 100644 --- a/crates/quarto-core/src/stage/stages/mod.rs +++ b/crates/quarto-core/src/stage/stages/mod.rs @@ -23,6 +23,7 @@ mod ast_transforms; mod code_highlight; mod compile_theme_css; mod engine_execution; +mod include_expansion; mod metadata_merge; mod parse_document; mod pre_engine_sugaring; @@ -34,6 +35,7 @@ pub use ast_transforms::AstTransformsStage; pub use code_highlight::CodeHighlightStage; pub use compile_theme_css::CompileThemeCssStage; pub use engine_execution::EngineExecutionStage; +pub use include_expansion::IncludeExpansionStage; pub use metadata_merge::MetadataMergeStage; pub use parse_document::ParseDocumentStage; pub use pre_engine_sugaring::PreEngineSugaringStage; diff --git a/crates/quarto-core/src/transforms/metadata_normalize.rs b/crates/quarto-core/src/transforms/metadata_normalize.rs index 753c4b169..a44d5279b 100644 --- a/crates/quarto-core/src/transforms/metadata_normalize.rs +++ b/crates/quarto-core/src/transforms/metadata_normalize.rs @@ -163,7 +163,7 @@ fn inlines_to_plain_text(inlines: &[Inline]) -> String { } } // Skip these - they don't contribute meaningful text - Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_, _) => {} + Inline::Shortcode(_) | Inline::NoteReference(_) | Inline::Attr(_) => {} } } result diff --git a/crates/quarto-core/src/transforms/resource_collector.rs b/crates/quarto-core/src/transforms/resource_collector.rs index 0eec4ba1c..bd746ce79 100644 --- a/crates/quarto-core/src/transforms/resource_collector.rs +++ b/crates/quarto-core/src/transforms/resource_collector.rs @@ -342,7 +342,7 @@ impl<'a> ResourceVisitor<'a> { | Inline::RawInline(_) | Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) => {} + | Inline::Attr(_) => {} } } diff --git a/crates/quarto-core/src/transforms/shortcode_resolve.rs b/crates/quarto-core/src/transforms/shortcode_resolve.rs index bba096cea..506017969 100644 --- a/crates/quarto-core/src/transforms/shortcode_resolve.rs +++ b/crates/quarto-core/src/transforms/shortcode_resolve.rs @@ -1021,7 +1021,7 @@ fn recurse_inline<'a>( | Inline::RawInline(_) | Inline::Shortcode(_) | Inline::NoteReference(_) - | Inline::Attr(_, _) => {} + | Inline::Attr(_) => {} } }) } diff --git a/crates/quarto-pandoc-types/src/block.rs b/crates/quarto-pandoc-types/src/block.rs index 766aef649..7fde1317d 100644 --- a/crates/quarto-pandoc-types/src/block.rs +++ b/crates/quarto-pandoc-types/src/block.rs @@ -40,6 +40,32 @@ pub enum Block { Custom(CustomNode), } +impl Block { + pub fn source_info(&self) -> &quarto_source_map::SourceInfo { + match self { + Block::Plain(b) => &b.source_info, + Block::Paragraph(b) => &b.source_info, + Block::LineBlock(b) => &b.source_info, + Block::CodeBlock(b) => &b.source_info, + Block::RawBlock(b) => &b.source_info, + Block::BlockQuote(b) => &b.source_info, + Block::OrderedList(b) => &b.source_info, + Block::BulletList(b) => &b.source_info, + Block::DefinitionList(b) => &b.source_info, + Block::Header(b) => &b.source_info, + Block::HorizontalRule(b) => &b.source_info, + Block::Table(b) => &b.source_info, + Block::Figure(b) => &b.source_info, + Block::Div(b) => &b.source_info, + Block::BlockMetadata(b) => &b.source_info, + Block::NoteDefinitionPara(b) => &b.source_info, + Block::NoteDefinitionFencedBlock(b) => &b.source_info, + Block::CaptionBlock(b) => &b.source_info, + Block::Custom(b) => &b.source_info, + } + } +} + pub type Blocks = Vec; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -156,3 +182,79 @@ pub struct CaptionBlock { pub content: Inlines, pub source_info: quarto_source_map::SourceInfo, } + +#[cfg(test)] +mod tests { + use super::*; + use quarto_source_map::{FileId, SourceInfo}; + + fn test_si(file: usize, start: usize, end: usize) -> SourceInfo { + SourceInfo::original(FileId(file), start, end) + } + + #[test] + fn source_info_plain() { + let si = test_si(0, 0, 10); + let block = Block::Plain(Plain { + content: vec![], + source_info: si.clone(), + }); + assert_eq!(block.source_info(), &si); + } + + #[test] + fn source_info_paragraph() { + let si = test_si(1, 10, 20); + let block = Block::Paragraph(Paragraph { + content: vec![], + source_info: si.clone(), + }); + assert_eq!(block.source_info(), &si); + } + + #[test] + fn source_info_codeblock() { + let si = test_si(2, 20, 30); + let block = Block::CodeBlock(CodeBlock { + attr: crate::attr::empty_attr(), + text: String::new(), + source_info: si.clone(), + attr_source: AttrSourceInfo::empty(), + }); + assert_eq!(block.source_info(), &si); + } + + #[test] + fn source_info_header() { + let si = test_si(3, 30, 40); + let block = Block::Header(Header { + level: 1, + attr: crate::attr::empty_attr(), + content: vec![], + source_info: si.clone(), + attr_source: AttrSourceInfo::empty(), + }); + assert_eq!(block.source_info(), &si); + } + + #[test] + fn source_info_div() { + let si = test_si(4, 40, 50); + let block = Block::Div(Div { + attr: crate::attr::empty_attr(), + content: vec![], + source_info: si.clone(), + attr_source: AttrSourceInfo::empty(), + }); + assert_eq!(block.source_info(), &si); + } + + #[test] + fn source_info_horizontal_rule() { + let si = test_si(5, 50, 53); + let block = Block::HorizontalRule(HorizontalRule { + source_info: si.clone(), + }); + assert_eq!(block.source_info(), &si); + } +} diff --git a/crates/quarto-pandoc-types/src/config_value.rs b/crates/quarto-pandoc-types/src/config_value.rs index 6856e591b..77d75f61d 100644 --- a/crates/quarto-pandoc-types/src/config_value.rs +++ b/crates/quarto-pandoc-types/src/config_value.rs @@ -44,7 +44,7 @@ fn inlines_to_plain_text(inlines: &[Inline]) -> String { Inline::Math(m) => text.push_str(&m.text), Inline::RawInline(_) => {} // Skip raw content Inline::Shortcode(_) => {} // Skip shortcodes - Inline::Attr(_, _) => {} // Skip attribute nodes + Inline::Attr(_) => {} // Skip attribute nodes Inline::Insert(i) => text.push_str(&inlines_to_plain_text(&i.content)), Inline::Delete(_) => {} // Skip deleted content Inline::Highlight(h) => text.push_str(&inlines_to_plain_text(&h.content)), diff --git a/crates/quarto-pandoc-types/src/inline.rs b/crates/quarto-pandoc-types/src/inline.rs index 98f246eef..a7774b16d 100644 --- a/crates/quarto-pandoc-types/src/inline.rs +++ b/crates/quarto-pandoc-types/src/inline.rs @@ -38,7 +38,7 @@ pub enum Inline { NoteReference(NoteReference), // this is used to represent commonmark attributes in the document in places // where they are not directly attached to a block, like in headings and tables - Attr(Attr, AttrSourceInfo), + Attr(InlineAttr), // CriticMarkup-like extensions Insert(Insert), @@ -53,6 +53,41 @@ pub enum Inline { Custom(CustomNode), } +impl Inline { + pub fn source_info(&self) -> &quarto_source_map::SourceInfo { + match self { + Inline::Str(s) => &s.source_info, + Inline::Emph(e) => &e.source_info, + Inline::Underline(u) => &u.source_info, + Inline::Strong(s) => &s.source_info, + Inline::Strikeout(s) => &s.source_info, + Inline::Superscript(s) => &s.source_info, + Inline::Subscript(s) => &s.source_info, + Inline::SmallCaps(s) => &s.source_info, + Inline::Quoted(q) => &q.source_info, + Inline::Cite(c) => &c.source_info, + Inline::Code(c) => &c.source_info, + Inline::Space(s) => &s.source_info, + Inline::SoftBreak(s) => &s.source_info, + Inline::LineBreak(l) => &l.source_info, + Inline::Math(m) => &m.source_info, + Inline::RawInline(r) => &r.source_info, + Inline::Link(l) => &l.source_info, + Inline::Image(i) => &i.source_info, + Inline::Note(n) => &n.source_info, + Inline::Span(s) => &s.source_info, + Inline::Shortcode(s) => &s.source_info, + Inline::NoteReference(n) => &n.source_info, + Inline::Attr(a) => &a.source_info, + Inline::Insert(i) => &i.source_info, + Inline::Delete(d) => &d.source_info, + Inline::Highlight(h) => &h.source_info, + Inline::EditComment(e) => &e.source_info, + Inline::Custom(c) => &c.source_info, + } + } +} + pub type Inlines = Vec; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] @@ -258,6 +293,24 @@ pub struct EditComment { pub attr_source: AttrSourceInfo, } +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct InlineAttr { + pub attr: Attr, + pub attr_source: AttrSourceInfo, + pub source_info: quarto_source_map::SourceInfo, +} + +impl InlineAttr { + pub fn new(attr: Attr, attr_source: AttrSourceInfo) -> Self { + let source_info = attr_source.combine_all().unwrap_or_default(); + Self { + attr, + attr_source, + source_info, + } + } +} + pub trait AsInline { fn as_inline(self) -> Inline; } @@ -309,9 +362,11 @@ impl_as_inline!( EditComment ); -// Note: Attr is omitted from the macro because it has two fields (Attr, AttrSourceInfo) -// and the macro doesn't support that pattern. Inline::Attr already IS an inline, -// so it doesn't need AsInline impl - the generic impl for Inline handles it. +impl AsInline for InlineAttr { + fn as_inline(self) -> Inline { + Inline::Attr(self) + } +} pub fn is_empty_target(target: &Target) -> bool { target.0.is_empty() && target.1.is_empty() @@ -1319,4 +1374,108 @@ mod tests { }; assert_eq!(note.content.len(), 1); } + + // === Inline::source_info() tests === + + fn test_si(file: usize, start: usize, end: usize) -> quarto_source_map::SourceInfo { + quarto_source_map::SourceInfo::original(quarto_source_map::FileId(file), start, end) + } + + #[test] + fn source_info_str() { + let si = test_si(0, 0, 5); + let inline = Inline::Str(Str { + text: "hello".into(), + source_info: si.clone(), + }); + assert_eq!(inline.source_info(), &si); + } + + #[test] + fn source_info_space() { + let si = test_si(0, 5, 6); + let inline = Inline::Space(Space { + source_info: si.clone(), + }); + assert_eq!(inline.source_info(), &si); + } + + #[test] + fn source_info_shortcode() { + let si = test_si(1, 10, 30); + let inline = Inline::Shortcode(crate::shortcode::Shortcode { + is_escaped: false, + name: "include".into(), + positional_args: vec![], + keyword_args: std::collections::HashMap::new(), + source_info: si.clone(), + }); + assert_eq!(inline.source_info(), &si); + } + + #[test] + fn source_info_attr_empty() { + // Empty AttrSourceInfo → source_info is SourceInfo::default() + let inline = Inline::Attr(InlineAttr::new( + (String::new(), vec![], LinkedHashMap::new()), + AttrSourceInfo::empty(), + )); + assert_eq!( + inline.source_info(), + &quarto_source_map::SourceInfo::default() + ); + } + + #[test] + fn source_info_attr_with_id() { + // AttrSourceInfo with an id → precomputed source_info matches the id's SourceInfo + let id_si = test_si(2, 10, 15); + let attr_source = AttrSourceInfo { + id: Some(id_si.clone()), + classes: vec![], + attributes: vec![], + }; + let inline = Inline::Attr(InlineAttr::new( + ("myid".into(), vec![], LinkedHashMap::new()), + attr_source, + )); + assert_eq!(inline.source_info(), &id_si); + } + + #[test] + fn source_info_attr_with_id_and_classes() { + // AttrSourceInfo with id + class → precomputed source_info is a Concat + let id_si = test_si(3, 10, 15); + let class_si = test_si(3, 16, 25); + let attr_source = AttrSourceInfo { + id: Some(id_si), + classes: vec![Some(class_si)], + attributes: vec![], + }; + let inline = Inline::Attr(InlineAttr::new( + ("myid".into(), vec!["myclass".into()], LinkedHashMap::new()), + attr_source, + )); + // The combined source_info should be a Concat with 2 pieces + match inline.source_info() { + quarto_source_map::SourceInfo::Concat { pieces } => { + assert_eq!(pieces.len(), 2); + } + other => panic!("Expected Concat, got {:?}", other), + } + } + + #[test] + fn source_info_link() { + let si = test_si(4, 0, 30); + let inline = Inline::Link(Link { + attr: (String::new(), vec![], LinkedHashMap::new()), + content: vec![], + target: ("url".into(), String::new()), + source_info: si.clone(), + attr_source: AttrSourceInfo::empty(), + target_source: TargetSourceInfo::empty(), + }); + assert_eq!(inline.source_info(), &si); + } } diff --git a/crates/quarto-pandoc-types/src/lib.rs b/crates/quarto-pandoc-types/src/lib.rs index 522161bd2..aa764ddfc 100644 --- a/crates/quarto-pandoc-types/src/lib.rs +++ b/crates/quarto-pandoc-types/src/lib.rs @@ -33,9 +33,9 @@ pub use caption::Caption; pub use custom::{CustomNode, Slot}; pub use inline::{ AsInline, Citation, CitationMode, Cite, Code, Delete, EditComment, Emph, Highlight, Image, - Inline, Inlines, Insert, LineBreak, Link, Math, MathType, Note, NoteReference, QuoteType, - Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, Subscript, - Superscript, Target, Underline, is_empty_target, make_cite_inline, make_span_inline, + Inline, InlineAttr, Inlines, Insert, LineBreak, Link, Math, MathType, Note, NoteReference, + QuoteType, Quoted, RawInline, SmallCaps, SoftBreak, Space, Span, Str, Strikeout, Strong, + Subscript, Superscript, Target, Underline, is_empty_target, make_cite_inline, make_span_inline, }; pub use list::{ListAttributes, ListNumberDelim, ListNumberStyle}; pub use meta::{Meta, MetaValue}; diff --git a/crates/quarto-test/src/spec.rs b/crates/quarto-test/src/spec.rs index 0e653a5fb..b132381e7 100644 --- a/crates/quarto-test/src/spec.rs +++ b/crates/quarto-test/src/spec.rs @@ -191,6 +191,11 @@ fn parse_format_spec(format: &str, value: &Value, _input_path: &Path) -> Result< assertions.push(Box::new(ShouldError::new())); } "printsMessage" => { + // Matches Q1 semantics (tests/smoke/smoke-all.test.ts: + // resolveTestSpecs): printsMessage alone does NOT suppress + // the default noErrorsOrWarnings. Fixtures that expect + // messages pair printsMessage with an explicit + // noErrors / noErrorsOrWarnings / shouldError. // Support both single object and array of printsMessage checks if let Some(arr) = assertion_value.as_sequence() { for item in arr { diff --git a/crates/quarto/tests/smoke-all/includes/basic/_child.qmd b/crates/quarto/tests/smoke-all/includes/basic/_child.qmd new file mode 100644 index 000000000..0cf05cfa0 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/basic/_child.qmd @@ -0,0 +1 @@ +This line contains BASIC-CHILD-MARKER-XYZ from the included file. diff --git a/crates/quarto/tests/smoke-all/includes/basic/basic.qmd b/crates/quarto/tests/smoke-all/includes/basic/basic.qmd new file mode 100644 index 000000000..2b024bf58 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/basic/basic.qmd @@ -0,0 +1,16 @@ +--- +title: Basic Include Expansion +format: html +_quarto: + tests: + html: + noErrors: true + ensureFileRegexMatches: + - ["BASIC-CHILD-MARKER-XYZ"] +--- + +Parent content before include. + +{{< include _child.qmd >}} + +Parent content after include. diff --git a/crates/quarto/tests/smoke-all/includes/circular/_loop.qmd b/crates/quarto/tests/smoke-all/includes/circular/_loop.qmd new file mode 100644 index 000000000..9902e7397 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/circular/_loop.qmd @@ -0,0 +1 @@ +{{< include circular.qmd >}} diff --git a/crates/quarto/tests/smoke-all/includes/circular/circular.qmd b/crates/quarto/tests/smoke-all/includes/circular/circular.qmd new file mode 100644 index 000000000..74f29d795 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/circular/circular.qmd @@ -0,0 +1,13 @@ +--- +title: Circular Include +format: html +_quarto: + tests: + html: + noErrors: default + printsMessage: + level: WARN + regex: "Circular include" +--- + +{{< include _loop.qmd >}} diff --git a/crates/quarto/tests/smoke-all/includes/code-cell/_cell.qmd b/crates/quarto/tests/smoke-all/includes/code-cell/_cell.qmd new file mode 100644 index 000000000..30ef1403b --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/code-cell/_cell.qmd @@ -0,0 +1,4 @@ +```{python} +# code-marker-from-child +print('hello from the included cell') +``` diff --git a/crates/quarto/tests/smoke-all/includes/code-cell/code-cell.qmd b/crates/quarto/tests/smoke-all/includes/code-cell/code-cell.qmd new file mode 100644 index 000000000..a62df9dd2 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/code-cell/code-cell.qmd @@ -0,0 +1,18 @@ +--- +title: Code Cell Through Include +format: html +execute: + eval: false +_quarto: + tests: + html: + noErrors: true + ensureFileRegexMatches: + - ["code-marker-from-child"] +--- + +Parent before. + +{{< include _cell.qmd >}} + +Parent after. diff --git a/crates/quarto/tests/smoke-all/includes/crossref/_figure.qmd b/crates/quarto/tests/smoke-all/includes/crossref/_figure.qmd new file mode 100644 index 000000000..35eee3af9 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/crossref/_figure.qmd @@ -0,0 +1 @@ +![Included figure caption.](placeholder.png){#fig-included} diff --git a/crates/quarto/tests/smoke-all/includes/crossref/crossref.qmd b/crates/quarto/tests/smoke-all/includes/crossref/crossref.qmd new file mode 100644 index 000000000..b1bdf4652 --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/crossref/crossref.qmd @@ -0,0 +1,14 @@ +--- +title: Crossref Through Include +format: html +_quarto: + tests: + html: + noErrors: true + ensureHtmlElements: + - ["a[href='#fig-included']"] +--- + +{{< include _figure.qmd >}} + +See @fig-included for the figure defined in the included file. diff --git a/crates/quarto/tests/smoke-all/includes/missing/missing.qmd b/crates/quarto/tests/smoke-all/includes/missing/missing.qmd new file mode 100644 index 000000000..aef070daf --- /dev/null +++ b/crates/quarto/tests/smoke-all/includes/missing/missing.qmd @@ -0,0 +1,13 @@ +--- +title: Missing Include +format: html +_quarto: + tests: + html: + noErrors: default + printsMessage: + level: WARN + regex: "Include file not found" +--- + +{{< include does-not-exist.qmd >}} diff --git a/hub-client/e2e/helpers/smokeAllDiscovery.ts b/hub-client/e2e/helpers/smokeAllDiscovery.ts index ff899214d..10b197d0c 100644 --- a/hub-client/e2e/helpers/smokeAllDiscovery.ts +++ b/hub-client/e2e/helpers/smokeAllDiscovery.ts @@ -185,6 +185,11 @@ function parseFormatSpec( assertions.push({ type: 'shouldError' }); break; case 'printsMessage': { + // Matches Q1 semantics (tests/smoke/smoke-all.test.ts: + // resolveTestSpecs): printsMessage alone does NOT suppress the + // default noErrorsOrWarnings. Fixtures that expect messages pair + // printsMessage with an explicit noErrors / noErrorsOrWarnings / + // shouldError. if (!options.skipPrintsMessage) { const items = Array.isArray(assertionValue) ? assertionValue : [assertionValue]; for (const item of items) { diff --git a/hub-client/src/services/smokeAll.wasm.test.ts b/hub-client/src/services/smokeAll.wasm.test.ts index bcb376c1e..d1e826fbb 100644 --- a/hub-client/src/services/smokeAll.wasm.test.ts +++ b/hub-client/src/services/smokeAll.wasm.test.ts @@ -247,6 +247,11 @@ function parseFormatSpec(format: string, value: Record, options assertions.push(assertShouldError); break; case 'printsMessage': { + // Matches Q1 semantics (tests/smoke/smoke-all.test.ts: + // resolveTestSpecs): printsMessage alone does NOT suppress the + // default noErrorsOrWarnings. Fixtures that expect messages pair + // printsMessage with an explicit noErrors / noErrorsOrWarnings / + // shouldError. if (!options.skipPrintsMessage) { const items = Array.isArray(assertionValue) ? assertionValue : [assertionValue]; for (const item of items) {