Skip to content

Add adjoints for texture_sample for WP_TEXTURE_ADDRESS_BORDER#1305

Open
eigenvivek wants to merge 5 commits intoNVIDIA:mainfrom
eigenvivek:texture-sample-adjoint
Open

Add adjoints for texture_sample for WP_TEXTURE_ADDRESS_BORDER#1305
eigenvivek wants to merge 5 commits intoNVIDIA:mainfrom
eigenvivek:texture-sample-adjoint

Conversation

@eigenvivek
Copy link
Copy Markdown

@eigenvivek eigenvivek commented Mar 18, 2026

Description

Implements adjoints such that texture_sample is differentiable wrt the sampling coordinates. Closes #1301.

Apologies for the double PR - trying to fix the multichannel issues accidentally closed my initial PR.

Checklist

  • I am familiar with the Contributing Guidelines.
  • New or existing tests cover these changes.
  • The documentation is up to date with these changes.

Test plan

I benchmarked correctness relative to torch's grid_sample:

import numpy as np
import torch
import torch.nn.functional as F
import warp as wp

wp.init()


@wp.kernel
def _sample_1d(tex: wp.Texture1D, pos: wp.array(dtype=float), out: wp.array(dtype=float)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)


@wp.kernel
def _sample_2d(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=float)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)


@wp.kernel
def _sample_3d(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=float)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)


def warp_grad(tex_cls, kernel, pos_dtype, data, coord):
    tex = tex_cls(data, normalized_coords=False, device="cpu")
    c = [coord] if not hasattr(coord, "__len__") else [pos_dtype(*coord)]
    pos = wp.array(c, dtype=pos_dtype, requires_grad=True, device="cpu")
    out = wp.zeros(1, dtype=float, requires_grad=True, device="cpu")
    tape = wp.Tape()
    with tape:
        wp.launch(kernel, dim=1, inputs=[tex, pos], outputs=[out], device="cpu")
    out.grad = wp.ones(1, dtype=float, device="cpu")
    tape.backward()
    return pos.grad.numpy()[0]


def torch_grad_1d(data, u):
    W = data.shape[0]
    t = torch.tensor(data, dtype=torch.float32)[None, None, None]
    grid = torch.tensor([[[[(u / W) * 2 - 1, 0.0]]]], dtype=torch.float32, requires_grad=True)
    F.grid_sample(t, grid, mode="bilinear", padding_mode="border", align_corners=False).backward()
    return float(grid.grad[0, 0, 0, 0] * (2 / W))


def torch_grad_2d(data, coord):
    H, W = data.shape
    u, v = coord
    t = torch.tensor(data, dtype=torch.float32)[None, None]
    grid = torch.tensor([[[[(u / W) * 2 - 1, (v / H) * 2 - 1]]]], dtype=torch.float32, requires_grad=True)
    F.grid_sample(t, grid, mode="bilinear", padding_mode="border", align_corners=False).backward()
    g = grid.grad[0, 0, 0].numpy()
    return np.array([g[0] * (2 / W), g[1] * (2 / H)])


def torch_grad_3d(data, coord):
    D, H, W = data.shape
    u, v, w = coord
    t = torch.tensor(data, dtype=torch.float32)[None, None]
    grid = torch.tensor(
        [[[[[(u / W) * 2 - 1, (v / H) * 2 - 1, (w / D) * 2 - 1]]]]], dtype=torch.float32, requires_grad=True
    )
    F.grid_sample(t, grid, mode="bilinear", padding_mode="border", align_corners=False).backward()
    g = grid.grad[0, 0, 0, 0].numpy()
    return np.array([g[0] * (2 / W), g[1] * (2 / H), g[2] * (2 / D)])


def print_table(title, rows):
    CW = max(len(r[0]) for r in rows)
    VW = max(max(len(str(np.round(np.atleast_1d(r[1]), 5))), len(str(np.round(np.atleast_1d(r[2]), 5)))) for r in rows)
    sep = "-" * (CW + 2 * VW + 20)
    print(f"\n{title}\n{sep}")
    print(f"{'coord':<{CW}}  {'warp':>{VW}}  {'torch':>{VW}}  {'max_err':>10}")
    print(sep)
    for label, wg, tg in rows:
        wg, tg = np.atleast_1d(wg), np.atleast_1d(tg)
        print(
            f"{label:<{CW}}  {str(np.round(wg, 5)):>{VW}}  {str(np.round(tg, 5)):>{VW}}  {np.abs(wg - tg).max():>10.2e}"
        )
    print(sep)


if __name__ == "__main__":
    rng = np.random.default_rng(0)
    d1 = rng.standard_normal(16).astype(np.float32)
    d2 = rng.standard_normal((6, 10)).astype(np.float32)
    d3 = rng.standard_normal((8, 6, 10)).astype(np.float32)

    print_table(
        "1D",
        [
            (f"u={u}", warp_grad(wp.Texture1D, _sample_1d, float, d1, u), torch_grad_1d(d1, u))
            for u in [0.1, 1.5, 7.3, 15.1, 15.9]
        ],
    )
    print_table(
        "2D",
        [
            (f"uv={c}", warp_grad(wp.Texture2D, _sample_2d, wp.vec2f, d2, c), torch_grad_2d(d2, c))
            for c in [(0.1, 0.1), (1.5, 1.5), (3.7, 2.3), (9.1, 5.1), (9.9, 5.9)]
        ],
    )
    print_table(
        "3D",
        [
            (f"uvw={c}", warp_grad(wp.Texture3D, _sample_3d, wp.vec3f, d3, c), torch_grad_3d(d3, c))
            for c in [(0.1, 0.1, 0.1), (1.5, 1.5, 1.5), (2.3, 3.7, 1.1), (9.1, 5.1, 7.1), (9.9, 5.9, 7.9)]
        ],
    )

which outputs:

1D
----------------------------------------------
coord         warp       torch     max_err
----------------------------------------------
u=0.1         [0.]       [-0.]    0.00e+00
u=1.5    [0.77253]   [0.77253]    0.00e+00
u=7.3   [-0.35692]  [-0.35692]    0.00e+00
u=15.1   [0.51364]   [0.51364]    0.00e+00
u=15.9        [0.]        [0.]    0.00e+00
----------------------------------------------

2D
-----------------------------------------------------------------------
coord                         warp                torch     max_err
-----------------------------------------------------------------------
uv=(0.1, 0.1)              [0. 0.]            [ 0. -0.]    0.00e+00
uv=(1.5, 1.5)    [0.464   0.79211]    [0.464   0.79211]    0.00e+00
uv=(3.7, 2.3)  [-2.44796  0.9687 ]  [-2.44796  0.9687 ]    0.00e+00
uv=(9.1, 5.1)  [-2.12048 -0.0399 ]  [-2.12048 -0.0399 ]    2.47e-06
uv=(9.9, 5.9)              [0. 0.]              [0. 0.]    0.00e+00
-----------------------------------------------------------------------

3D
-----------------------------------------------------------------------------------------------
coord                                        warp                         torch     max_err
-----------------------------------------------------------------------------------------------
uvw=(0.1, 0.1, 0.1)                    [0. 0. 0.]                    [0. 0. 0.]    0.00e+00
uvw=(1.5, 1.5, 1.5)  [-0.69482  0.00284 -1.18373]  [-0.69482  0.00284 -1.18373]    0.00e+00
uvw=(2.3, 3.7, 1.1)  [ 0.2395  -0.41915  1.72461]  [ 0.2395  -0.41915  1.72461]    6.56e-07
uvw=(9.1, 5.1, 7.1)  [-0.77744  0.13602  0.39862]  [-0.77744  0.13602  0.39861]    2.06e-06
uvw=(9.9, 5.9, 7.9)                    [0. 0. 0.]                 [-0. -0. -0.]    0.00e+00
-----------------------------------------------------------------------------------------------

I also built an differentiable X-ray renderer with warp and verified that the gradients flow backwards through texture sampling, allowing you to estimate camera poses. Code isn't on github yet, but happy to push it if that's useful.

warp

New feature / enhancement

import numpy as np
import warp as wp

wp.init()


@wp.kernel
def sample(tex: wp.Texture1D, pos: wp.array(dtype=float), out: wp.array(dtype=float)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)


data = np.arange(16, dtype=np.float32)
tex = wp.Texture1D(data, normalized_coords=False)
pos = wp.array([4.0], dtype=float, requires_grad=True)
out = wp.zeros(1, dtype=float, requires_grad=True)

tape = wp.Tape()
with tape:
    wp.launch(sample, dim=1, inputs=[tex, pos], outputs=[out])
out.grad = wp.ones(1, dtype=float)
tape.backward()

print("out:", out.numpy())            # [3.5]
print("pos.grad:", pos.grad.numpy())  # [1.]

Summary by CodeRabbit

  • New Features

    • Texture sampling operations now support automatic differentiation and gradient computation
    • Added configuration options for texture sampling: filter modes, coordinate normalization, and boundary address modes
  • Tests

    • Comprehensive test coverage for texture sampling gradients across 1D, 2D, and 3D operations

@copy-pr-bot
Copy link
Copy Markdown

copy-pr-bot bot commented Mar 18, 2026

This pull request requires additional validation before any workflows can run on NVIDIA's runners.

Pull request vetters can view their responsibilities here.

Contributors can view more details about this message here.

@coderabbitai
Copy link
Copy Markdown

coderabbitai bot commented Mar 18, 2026

📝 Walkthrough

Walkthrough

This change implements differentiable texture sampling for Warp by extending texture descriptor structs with sampling configuration fields (filter mode, address modes), replacing gradient stub implementations with actual gradient computation for 1D/2D/3D texture sampling, and adding comprehensive test coverage for various sampling and gradient scenarios.

Changes

Cohort / File(s) Summary
Differentiability Metadata
warp/_src/builtins.py
Updated is_differentiable flag from False to True for all texture_sample overloads (1D, 2D, 3D variants).
Texture Struct Extensions
warp/_src/texture.py, warp/native/texture.h
Extended texture1d_t, texture2d_t, and texture3d_t structs with new fields: filter_mode, use_normalized_coords, and per-axis address modes (address_mode_u, address_mode_v, address_mode_w). Updated ctypes and C++ constructors to accept and store these parameters.
Gradient Implementation
warp/native/texture.h
Implemented adj_texture_sample overloads for 1D/2D/3D that compute actual texture gradients via bilinear/trilinear interpolation. Added GPU-only fetch helpers and conditional gradient accumulation with early returns for CLOSEST filter mode and validation of BORDER address mode.
Test Coverage
warp/tests/cuda/test_texture.py
Added sampling kernels (sample_1d, sample_2d, sample_3d), gradient helper functions (with normalized coordinate variants), and 26 new regression tests covering boundary cases, filter modes, linear signals, uniform data, and multi-channel sampling.

Sequence Diagram

sequenceDiagram
    participant Kernel as Sampling Kernel
    participant Forward as Forward Pass
    participant Texture as Texture Object
    participant Tape as Warp Tape
    participant Backward as Backward Pass
    participant Adjoint as Adjoint Texture Sampler

    Kernel->>Forward: launch with texture, coords
    Forward->>Texture: sample at u/v/w
    Texture-->>Forward: returns scalar/vec2f/vec4f
    Forward-->>Tape: records operation

    Tape->>Backward: tape.backward()
    Backward->>Adjoint: adj_texture_sample(tex, coords, adj_ret)
    
    alt filter_mode == CLOSEST
        Adjoint-->>Backward: skip gradient (early return)
    else filter_mode == LINEAR
        alt address_mode != BORDER
            Adjoint-->>Backward: error, skip gradient
        else
            Adjoint->>Texture: fetch neighbor samples
            Adjoint->>Adjoint: compute bilinear/trilinear gradients
            Adjoint->>Adjoint: accumulate adj_u/adj_v/adj_w
            Adjoint-->>Backward: returns accumulated gradients
        end
    end
    
    Backward-->>Kernel: propagates gradients to input coordinates
Loading

Estimated code review effort

🎯 4 (Complex) | ⏱️ ~50 minutes

🚥 Pre-merge checks | ✅ 4 | ❌ 1

❌ Failed checks (1 warning)

Check name Status Explanation Resolution
Docstring Coverage ⚠️ Warning Docstring coverage is 45.28% which is insufficient. The required threshold is 80.00%. Write docstrings for the functions missing them to satisfy the coverage threshold.
✅ Passed checks (4 passed)
Check name Status Explanation
Description Check ✅ Passed Check skipped - CodeRabbit’s high-level summary is enabled.
Title check ✅ Passed The title accurately describes the main change: implementing adjoints for texture_sample with border address mode, which directly addresses the PR objectives.
Linked Issues check ✅ Passed All coding requirements from issue #1301 are met: adjoints implemented for texture_sample (1D/2D/3D), gradients w.r.t. coordinates work correctly, and comprehensive tests validate numerical correctness.
Out of Scope Changes check ✅ Passed All changes directly support texture_sample adjoint implementation: metadata updates marking functions differentiable, struct extensions for sampling configuration, adjoint computations, and comprehensive test coverage.

✏️ Tip: You can configure your own custom pre-merge checks in the settings.

✨ Finishing Touches
🧪 Generate unit tests (beta)
  • Create PR with unit tests
📝 Coding Plan
  • Generate coding plan for human review comments

Comment @coderabbitai help to get the list of available commands and usage tips.

@greptile-apps
Copy link
Copy Markdown

greptile-apps bot commented Mar 18, 2026

Greptile Summary

This PR adds differentiability to texture_sample for 1D, 2D, and 3D textures by implementing adjoint functions that compute coordinate gradients via finite differences on adjacent texels. The Python-side ctype structs are extended to carry filter_mode, use_normalized_coords, and per-axis address_mode fields so the native adjoint can correctly branch on them. All five texture_sample overloads in builtins.py are marked is_differentiable=True, and a thorough test suite covers boundary behaviour, linear-signal gradients, multi-channel textures, and normalized coordinates.

Two issues need attention before merging:

  • Per-thread CUDA printf flood — the address-mode mismatch check calls printf unconditionally in device code. Every CUDA thread in a backward-pass kernel will emit the message, potentially producing millions of identical lines and overflowing CUDA's fixed printf buffer (~1 MB). This should be guarded to print only once (e.g., threadIdx.x == 0 && blockIdx.x == 0) or, ideally, validated on the Python/CPU side before the backward kernel is launched. The same #if defined(__CUDA_ARCH__) / #else block is also redundant since both branches are character-for-character identical. This occurs in all three adjoint overloads (1D, 2D, 3D).

  • Boundary gradient incorrect even for BORDER mode — the guard x0 >= 0 && x1 < tex.width (and its 2D/3D equivalents for each axis) zeros the gradient whenever the sample straddles the texture boundary. For BORDER (zero-fill) mode, when x0 = -1 (border → 0) and x1 = 0 (valid), the correct gradient is v[0], not 0. The block comment in texture.h claims "which is correct for BORDER mode" — this is inaccurate. The tests pass because they benchmark against PyTorch's padding_mode="border", which is actually clamp-to-edge (not zero-fill), and clamp semantics do give zero at boundary-straddling positions. The actual Warp WP_TEXTURE_ADDRESS_BORDER semantics (zero outside) would produce a nonzero gradient.

Confidence Score: 2/5

  • PR introduces a useful feature but has a runtime correctness bug (boundary gradients incorrect even for BORDER mode) and a GPU performance/reliability issue (per-thread printf) that should be fixed before merging.
  • The adjoint math is correct for interior positions, but two issues reduce confidence: (1) the per-thread CUDA printf will cause severe output flooding and performance degradation in any real CUDA workload using non-BORDER textures; (2) the boundary gradient is zero where it should be nonzero for BORDER mode, and the code comment and tests both incorrectly validate this as correct by benchmarking against PyTorch CLAMP rather than PyTorch BORDER (zeros) semantics.
  • warp/native/texture.h — both the printf-per-thread issue and the boundary gradient logic bug are in this file, affecting all three adjoint overloads (1D/2D/3D). warp/tests/cuda/test_texture.py — boundary tests should be updated to validate against true BORDER zero-fill semantics rather than CLAMP semantics.

Important Files Changed

Filename Overview
warp/native/texture.h Core of the PR: adds adjoint implementations for 1D/2D/3D texture_sample. Two issues found: (1) per-thread CUDA printf on address-mode mismatch will overflow the printf buffer and degrade performance; (2) the boundary-straddling gradient is incorrectly zero even for BORDER mode, and the comment claiming otherwise is misleading.
warp/_src/texture.py Correctly exposes filter_mode, use_normalized_coords, and per-axis address_mode fields from the three ctype structs and passes them through to the native structs. No issues found.
warp/_src/builtins.py All five texture_sample overloads are correctly flipped to is_differentiable=True. No issues found.
warp/tests/cuda/test_texture.py Comprehensive gradient tests covering boundary zeros, linear signal gradients, multi-channel textures, and normalized/unnormalized coordinates for all three dimensionalities. Tests validate against torch CLAMP behavior (padding_mode="border") rather than true BORDER zero-fill semantics, which means they pass even for positions where the gradient is incorrectly zeroed at the boundary.

Sequence Diagram

sequenceDiagram
    participant User as User Code
    participant Tape as wp.Tape
    participant Fwd as texture_sample (forward)
    participant HW as CUDA Texture HW / CPU Texture
    participant Adj as adj_texture_sample (backward)

    User->>Tape: record kernel launch
    Tape->>Fwd: texture_sample(tex, u)
    Fwd->>HW: hardware bilinear fetch (address_mode=BORDER)
    HW-->>Fwd: interpolated value
    Fwd-->>User: output value

    User->>Tape: tape.backward()
    Tape->>Adj: adj_texture_sample(tex, u, adj_tex, adj_u, adj_ret)

    alt filter_mode == CLOSEST
        Adj-->>Tape: return (gradient = 0)
    else address_mode != BORDER
        Adj->>Adj: printf ERROR (⚠ per-thread on GPU)
        Adj-->>Tape: return (gradient = 0, silently wrong)
    else both neighbours in bounds (x0>=0 && x1<width)
        Adj->>HW: fetch texel at x0 center
        Adj->>HW: fetch texel at x1 center
        HW-->>Adj: v0, v1
        Adj->>Adj: adj_u += (v1 - v0) * adj_ret
        Adj-->>Tape: correct gradient
    else one neighbour out of bounds (⚠ BORDER bug)
        Adj-->>Tape: return 0 (incorrect: should use border=0 for out-of-bounds texel)
    end

    Tape-->>User: pos.grad updated
Loading

Last reviewed commit: "Update"

Copy link
Copy Markdown

@coderabbitai coderabbitai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick comments (2)
warp/tests/cuda/test_texture.py (1)

2834-2895: Consider extracting the inline kernel sample_2d_vec2 to module level.

The kernel sample_2d_vec2 is defined inline within two test functions (test_texture2d_adj_vec2f_linear_x and test_texture2d_adj_vec2f_channels_independent). While this works, Warp will recompile the kernel if it's defined multiple times with the same signature. Moving it to module level alongside sample_1d, sample_2d, sample_3d would avoid potential redundant compilation.

♻️ Suggested refactor

Move the kernel definition to module level (around line 2620):

`@wp.kernel`
def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)


`@wp.kernel`
def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=wp.vec2f)):
    tid = wp.tid()
    out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)

Then reference them in the test functions instead of inline definitions.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@warp/tests/cuda/test_texture.py` around lines 2834 - 2895, The inline kernel
sample_2d_vec2 is duplicated across two tests causing redundant Warp
recompilation; move its `@wp.kernel` definition to module scope (near where
sample_1d, sample_2d, sample_3d are defined) and import reuse it from both
test_texture2d_adj_vec2f_linear_x and
test_texture2d_adj_vec2f_channels_independent; also add the suggested companion
sample_3d_vec2 at module level if needed, then remove the inline kernel
definitions from the test functions and call the module-level sample_2d_vec2
instead.
warp/native/texture.h (1)

922-1012: Minor inconsistency in CPU path coordinate source.

The gtx_mult/gty_mult are computed from tex.use_normalized_coords (the descriptor), but later the CPU path uses cpu_tex->use_normalized_coords for coordinate transformation. While these should always match (the descriptor is populated from the same source), the inconsistency could cause subtle bugs if they ever diverge.

♻️ Suggested alignment

For consistency, consider using cpu_tex->use_normalized_coords for computing the gradient multipliers in the CPU path:

 `#else`
     if (tex.tex == 0)
         return;
     const Texture* cpu_tex = (const Texture*)tex.tex;
+    float gtx_mult = cpu_tex->use_normalized_coords ? (float)cpu_tex->width : 1.0f;
+    float gty_mult = cpu_tex->use_normalized_coords ? (float)cpu_tex->height : 1.0f;
 
     float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width);

Or alternatively, move the multiplier computation inside the #else block using cpu_tex consistently.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@warp/native/texture.h` around lines 922 - 1012, In adj_texture_sample the CPU
path uses cpu_tex->use_normalized_coords for coordinate transforms but
gtx_mult/gty_mult are computed from tex.use_normalized_coords (descriptor),
creating a mismatch risk; fix by computing gtx_mult and gty_mult inside the
`#else` branch using cpu_tex->use_normalized_coords (or replace usages in the CPU
block to use the previously computed gtx_mult/gty_mult only after assigning them
from cpu_tex), updating the variables referenced in adj_texture_sample so the
CPU path consistently uses cpu_tex->use_normalized_coords.
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.

Nitpick comments:
In `@warp/native/texture.h`:
- Around line 922-1012: In adj_texture_sample the CPU path uses
cpu_tex->use_normalized_coords for coordinate transforms but gtx_mult/gty_mult
are computed from tex.use_normalized_coords (descriptor), creating a mismatch
risk; fix by computing gtx_mult and gty_mult inside the `#else` branch using
cpu_tex->use_normalized_coords (or replace usages in the CPU block to use the
previously computed gtx_mult/gty_mult only after assigning them from cpu_tex),
updating the variables referenced in adj_texture_sample so the CPU path
consistently uses cpu_tex->use_normalized_coords.

In `@warp/tests/cuda/test_texture.py`:
- Around line 2834-2895: The inline kernel sample_2d_vec2 is duplicated across
two tests causing redundant Warp recompilation; move its `@wp.kernel` definition
to module scope (near where sample_1d, sample_2d, sample_3d are defined) and
import reuse it from both test_texture2d_adj_vec2f_linear_x and
test_texture2d_adj_vec2f_channels_independent; also add the suggested companion
sample_3d_vec2 at module level if needed, then remove the inline kernel
definitions from the test functions and call the module-level sample_2d_vec2
instead.

ℹ️ Review info
⚙️ Run configuration

Configuration used: Path: .coderabbit.yml

Review profile: CHILL

Plan: Pro

Run ID: fab35151-95bd-4edd-877a-70e9dda3dc1c

📥 Commits

Reviewing files that changed from the base of the PR and between fe1742c and aa33b59.

📒 Files selected for processing (4)
  • warp/_src/builtins.py
  • warp/_src/texture.py
  • warp/native/texture.h
  • warp/tests/cuda/test_texture.py

@eigenvivek eigenvivek force-pushed the texture-sample-adjoint branch from 841cc7a to 04e939a Compare March 19, 2026 00:28
Copy link
Copy Markdown

@coderabbitai coderabbitai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actionable comments posted: 4

🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.

Inline comments:
In `@PUBLICATIONS.md`:
- Around line 11-12: The PR includes unrelated edits to PUBLICATIONS.md (two
added entries for Kamino and ComFree-Sim) while the PR should only implement
adjoints for texture_sample (PR `#1305`); remove the two added publication lines
from this branch (or move them into a separate documentation PR) and update the
PR description/AI summary to stop claiming an entry was "replaced"—it should
state no publication was removed and that the changes are purely unrelated
documentation additions. Ensure references to the texture_sample adjoint work
(the adjoint implementation files/functions) remain untouched and the commit
history contains only code changes relevant to implementing texture_sample
adjoints.

In `@warp/_src/texture.py`:
- Line 10: The warning is being emitted on the default/non-differentiable
constructor path and uses the stdlib warnings.warn; change the logic so the
guard only fires on the differentiable LINEAR sampling path (not on default
CLAMP or CLOSEST constructors) and replace uses of warnings.warn with
warp._src.utils.warn brought in via a lazy import inside texture.py to avoid
circular imports; update the warning sites referenced in the file (the initial
import area and the other warning blocks around the adjoint/differentiable code
paths) to perform a local import like "from warp._src.utils import warn" inside
the function/code path that triggers the warning and call warn(...) instead of
warnings.warn(...), and ensure the condition checks reference address_mode ==
LINEAR (or the differentiable code path) rather than always running on
construction (affecting the three places noted).

In `@warp/native/texture.h`:
- Around line 860-875: The texture sampling adjoints (e.g., texture_sample) only
handle WP_TEXTURE_ADDRESS_BORDER and thus produce incorrect gradients for
CLAMP/WRAP/MIRROR; update the adjoint path to check the texture's address modes
at the start of texture_sample and if any axis is not WP_TEXTURE_ADDRESS_BORDER
either (A) return/raise a clear runtime error or mark the op as
non-differentiable (fail fast), or (B) implement the correct gradient logic for
CLAMP/WRAP/MIRROR; ensure the check covers all adjoint implementations
referenced in this file and emits a clear message referencing the unsupported
mode so callers cannot silently get wrong gradients.

In `@warp/tests/cuda/test_texture.py`:
- Around line 2626-2677: The helpers _grad_1d, _grad_2d, and _grad_3d currently
hardcode normalized_coords=False so gradients only exercise texel-space
sampling; make them accept a normalized_coords parameter (default False) and
pass it into the Texture1D/Texture2D/Texture3D constructors (the Texture* calls)
or alternatively add new test invocations that call the existing helpers with
normalized_coords=True; update any calls to these helpers (or add at least one
new 1D, 2D, and 3D test) to run with normalized_coords=True so the
normalized-coordinate adjoint branch (use_normalized_coords) is exercised.

ℹ️ Review info
⚙️ Run configuration

Configuration used: Path: .coderabbit.yml

Review profile: CHILL

Plan: Pro

Run ID: 21b980a8-befb-4100-b355-9e1d42535156

📥 Commits

Reviewing files that changed from the base of the PR and between 841cc7a and 04e939a.

📒 Files selected for processing (5)
  • PUBLICATIONS.md
  • warp/_src/builtins.py
  • warp/_src/texture.py
  • warp/native/texture.h
  • warp/tests/cuda/test_texture.py

@eigenvivek eigenvivek changed the title Add adjoints for texture_sample Add adjoints for texture_sample for WP_TEXTURE_ADDRESS_BORDER Mar 19, 2026
eigenvivek and others added 5 commits March 18, 2026 21:41
Signed-off-by: eigenvivek <vivekg@mit.edu>
Add Kamino (GPU multi-body simulation) and ComFree-Sim
(GPU-parallelized contact physics engine) to the 2026 publications.

Signed-off-by: Eric Shi <ershi@nvidia.com>
Signed-off-by: eigenvivek <vivekg@mit.edu>
Signed-off-by: eigenvivek <vivekg@mit.edu>
Signed-off-by: eigenvivek <vivekg@mit.edu>
Signed-off-by: eigenvivek <vivekg@mit.edu>
@eigenvivek eigenvivek force-pushed the texture-sample-adjoint branch from 04e939a to 1d30e72 Compare March 19, 2026 01:41
Copy link
Copy Markdown

@coderabbitai coderabbitai bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actionable comments posted: 1

🧹 Nitpick comments (1)
warp/tests/cuda/test_texture.py (1)

2608-3056: The new RGBA and 1D multi-channel adjoint paths are still untested.

warp/native/texture.h now adds dedicated fetch_1d/2d/3d branches for multi-channel adjoints, including vec4f, but this suite only exercises scalar and a couple of vec2f cases. Please add at least one 1D multi-channel gradient case and one vec4f regression so the new channel-selection code does not stay uncovered.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@warp/tests/cuda/test_texture.py` around lines 2608 - 3056, The test suite
misses coverage for the new multi-channel adjoint branches (1D multi-channel and
vec4f) added in native/texture.h; add two tests that exercise these paths: (1) a
1D multi-channel gradient test that creates a Texture1D with shape (W, C) or
similar multi-channel data, seeds pos via the existing sample_1d kernel and uses
a helper like _grad_1d (or a new _grad_1d_multi) to assert channel-wise gradient
behavior (e.g., one channel linear in x, others constant), and (2) a vec4f
regression that uses Texture2D/Texture1D with dtype=vec4f, a kernel like
sample_2d_vec2/sample_2d (adapted to vec4f) or a new sample_1d_vec4 to sample
dtype=wp.vec4f, seeds out.grad with a vec4f that selects channels, and asserts
the expected gradient values; reference sample_1d, sample_2d_vec2, _grad_1d and
_grad_2d helpers to locate where to add these tests and mirror existing patterns
(texture creation, normalized_coords/filter_mode/address_mode, wp.Tape,
wp.launch, out.grad, tape.backward, and numpy assertions).
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.

Inline comments:
In `@warp/native/texture.h`:
- Around line 943-951: The current guard zeroes adj_u when either neighbor is
out-of-bounds; instead compute the boundary contribution by using the actual
fetched corner values with out-of-range samples treated as border-zero (so CPU
forward's mix with zero is mirrored). Concretely, in the block that computes gtx
for adj_u (using x0, x1, tex.width, tex.use_normalized_coords, gtx_mult, adj_ret
and texture_sample_helper<T>::fetch_1d), remove the early-reject condition and
compute per-channel values v0 and v1 where v0 = (x0 in range ? fetch_1d(..., u0,
c) : 0.0f) and v1 = (x1 in range ? fetch_1d(..., u1, c) : 0.0f); then form gtx
+= (v1 - v0) * ((const float*)&adj_ret)[c] and update adj_u += gtx_mult * gtx so
boundary derivatives reflect blended-in border zeros; apply the same change to
the analogous blocks for adj_v/adj_w referenced in the comment ranges.

---

Nitpick comments:
In `@warp/tests/cuda/test_texture.py`:
- Around line 2608-3056: The test suite misses coverage for the new
multi-channel adjoint branches (1D multi-channel and vec4f) added in
native/texture.h; add two tests that exercise these paths: (1) a 1D
multi-channel gradient test that creates a Texture1D with shape (W, C) or
similar multi-channel data, seeds pos via the existing sample_1d kernel and uses
a helper like _grad_1d (or a new _grad_1d_multi) to assert channel-wise gradient
behavior (e.g., one channel linear in x, others constant), and (2) a vec4f
regression that uses Texture2D/Texture1D with dtype=vec4f, a kernel like
sample_2d_vec2/sample_2d (adapted to vec4f) or a new sample_1d_vec4 to sample
dtype=wp.vec4f, seeds out.grad with a vec4f that selects channels, and asserts
the expected gradient values; reference sample_1d, sample_2d_vec2, _grad_1d and
_grad_2d helpers to locate where to add these tests and mirror existing patterns
(texture creation, normalized_coords/filter_mode/address_mode, wp.Tape,
wp.launch, out.grad, tape.backward, and numpy assertions).

ℹ️ Review info
⚙️ Run configuration

Configuration used: Path: .coderabbit.yml

Review profile: CHILL

Plan: Pro

Run ID: 8780433c-444b-4ea9-a6a6-993c9d36e94e

📥 Commits

Reviewing files that changed from the base of the PR and between 04e939a and 1d30e72.

📒 Files selected for processing (4)
  • warp/_src/builtins.py
  • warp/_src/texture.py
  • warp/native/texture.h
  • warp/tests/cuda/test_texture.py
🚧 Files skipped from review as they are similar to previous changes (2)
  • warp/_src/texture.py
  • warp/_src/builtins.py

Comment on lines +943 to +951
if (x0 >= 0 && x1 < tex.width) {
float u0 = tex.use_normalized_coords ? ((float)x0 + 0.5f) / (float)tex.width : (float)x0 + 0.5f;
float u1 = tex.use_normalized_coords ? ((float)x1 + 0.5f) / (float)tex.width : (float)x1 + 0.5f;
float gtx = 0.0f;
for (int c = 0; c < tex.num_channels; c++)
gtx += (texture_sample_helper<T>::fetch_1d(tex.tex, u1, c)
- texture_sample_helper<T>::fetch_1d(tex.tex, u0, c))
* ((const float*)&adj_ret)[c];
adj_u += gtx_mult * gtx;
Copy link
Copy Markdown

@coderabbitai coderabbitai bot Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Boundary-coordinate gradients are being zeroed even though the current CPU sampler still has slope there.

These x_ok / y_ok / z_ok guards drop adj_u / adj_v / adj_w as soon as the interpolation footprint crosses an edge. That disagrees with the CPU forward path above, which still blends any in-bounds texel with border zeros in that region. Example: in 1D texel space, sampling at u = 0.1 gives a weighted contribution from texel 0, so d/du is generally non-zero, but the new adjoint returns 0. Please derive the boundary term from the fetched corner values instead of requiring both neighbors along that axis to be in range; otherwise CPU AD is not differentiating the actual CPU forward function.

Also applies to: 967-975, 1024-1046, 1070-1091, 1154-1191, 1223-1260

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@warp/native/texture.h` around lines 943 - 951, The current guard zeroes adj_u
when either neighbor is out-of-bounds; instead compute the boundary contribution
by using the actual fetched corner values with out-of-range samples treated as
border-zero (so CPU forward's mix with zero is mirrored). Concretely, in the
block that computes gtx for adj_u (using x0, x1, tex.width,
tex.use_normalized_coords, gtx_mult, adj_ret and
texture_sample_helper<T>::fetch_1d), remove the early-reject condition and
compute per-channel values v0 and v1 where v0 = (x0 in range ? fetch_1d(..., u0,
c) : 0.0f) and v1 = (x1 in range ? fetch_1d(..., u1, c) : 0.0f); then form gtx
+= (v1 - v0) * ((const float*)&adj_ret)[c] and update adj_u += gtx_mult * gtx so
boundary derivatives reflect blended-in border zeros; apply the same change to
the analogous blocks for adj_v/adj_w referenced in the comment ranges.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was done to perfectly match pytorch's grid sample. Happy to implement either.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description image shows three grayscale pelvic X-ray panels labeled “Fixed,” “Moving,” and “Overlay.” Each panel has axes from 1 to 200. The Overlay panel draws red contour-like curves over the fixed image, visualizing misalignment/registration features.

@eigenvivek Thanks — if the goal is to mirror PyTorch’s grid_sample, the gradients at the near-border half-texel should not be dropped. In PyTorch’s backward CPU kernel, grad w.r.t. grid is formed from corner differences with per-corner masks; out-of-bounds neighbors are treated as zeros, but the derivative along an axis is still accumulated from the in-bounds neighbor(s) rather than being zeroed for the whole axis. See the gx/gy update using masked gathers in ATen’s GridSamplerKernel.cpp, and the per-padding apply_get_grad logic that only zeros the multiplier when the coordinate is outside the valid range. This matches a “zeros” padding interpretation at the neighbor level, not an early return when one neighbor is OOB. (github.com)

Proposal

  • Adopt PyTorch-style boundary derivatives for LINEAR + BORDER (our BORDER returns 0 outside) by removing the x_ok/y_ok/z_ok gates and relying on fetch_* to yield 0 for OOB neighbors on both CPU and GPU.
  • This makes AD consistent with our CPU forward and PyTorch’s behavior for zeros padding; we still early-return for FILTER_CLOSEST and bail out for non-BORDER address modes.

Minimal diffs (illustrative)

  1. 1D (GPU branch)
// before
if (x0 >= 0 && x1 < tex.width) {
    float u0 = ...; float u1 = ...;
    float gtx = 0.0f;
    for (int c = 0; c < tex.num_channels; ++c)
        gtx += (fetch_1d(tex.tex, u1, c) - fetch_1d(tex.tex, u0, c)) * ((const float*)&adj_ret)[c];
    adj_u += gtx_mult * gtx;
}

// after
float u0 = tex.use_normalized_coords ? ((float)x0 + 0.5f) / (float)tex.width : (float)x0 + 0.5f;
float u1 = tex.use_normalized_coords ? ((float)x1 + 0.5f) / (float)tex.width : (float)x1 + 0.5f;
float gtx = 0.0f;
for (int c = 0; c < tex.num_channels; ++c) {
    float v0 = texture_sample_helper<T>::fetch_1d(tex.tex, u0, c); // 0 if OOB in BORDER
    float v1 = texture_sample_helper<T>::fetch_1d(tex.tex, u1, c);
    gtx += (v1 - v0) * ((const float*)&adj_ret)[c];
}
adj_u += gtx_mult * gtx;
  1. 2D (CPU branch)
// compute v00, v10, v01, v11 as today (fetch_* already returns 0 for OOB in BORDER)
float gtx = 0.0f, gty = 0.0f;
for (int c = 0; c < cpu_tex->num_channels; ++c) {
    float gOut = ((const float*)&adj_ret)[c];
    float v00 = cpu_fetch_texel_2d(cpu_tex, x0w, y0w, c);
    float v10 = cpu_fetch_texel_2d(cpu_tex, x1w, y0w, c);
    float v01 = cpu_fetch_texel_2d(cpu_tex, x0w, y1w, c);
    float v11 = cpu_fetch_texel_2d(cpu_tex, x1w, y1w, c);

    // remove x_ok / y_ok guards
    gtx += ((v10 - v00) * (1.0f - fy) + (v11 - v01) * fy) * gOut;
    gty += ((v01 - v00) * (1.0f - fx) + (v11 - v10) * fx) * gOut;
}
adj_u += gtx_mult * gtx;
adj_v += gty_mult * gty;
  1. 3D is analogous: drop z_ok/y_ok/x_ok and use fetched corner values (zeros when OOB) in the existing trilinear difference expressions.

Tests

  • Update boundary tests to assert the non-zero slope within the half-texel band adjacent to the domain (for LINEAR + BORDER), matching PyTorch’s behavior. PyTorch’s kernel accumulates corner-difference terms with masked OOB neighbors and only zeros the multiplier when the coordinate is outside the valid range for clamp-style modes. (github.com)
  • Keep CLOSEST filter tests expecting zero grads.

If you’d prefer to emulate PyTorch’s padding_mode="border" (clamp-to-edge) specifically, we can follow its apply_get_grad gating, which zeros d(grid) when the clamping is active; but that corresponds to clamp semantics, not our current BORDER=zero-padding. PyTorch API reference for grid_sample here for terminology. (docs.pytorch.org)

Let me know which you want to lock in:

  • Option A (recommended): PyTorch-style zeros-padding boundary gradients (remove gates).
  • Option B: Keep zero-at-boundary behavior and document the intentional divergence from PyTorch zeros-padding; rename tests accordingly.

Happy to post a patch once you confirm the option.


✏️ Learnings added
Learnt from: eigenvivek
Repo: NVIDIA/warp PR: 1305
File: warp/native/texture.h:943-951
Timestamp: 2026-03-19T11:54:43.181Z
Learning: Texture adjoints should match PyTorch grid_sample semantics; near-border derivatives under linear + zero-padding must accumulate from in-bounds neighbors instead of being dropped wholesale.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description image shows three grayscale pelvic X‑ray panels side-by-side labeled “Fixed,” “Moving,” and “Overlay,” each with axes from 1 to 200. The Overlay panel draws red contour-like curves over the fixed image, visualizing registration features and misalignment.

@eigenvivek great — proceeding with Option A (PyTorch-style zeros-padding boundary gradients). Below are concrete diffs to remove the near-boundary x_ok/y_ok/z_ok gates and accumulate derivatives from fetched corner values where OOB neighbors naturally contribute 0. This keeps FILTER_CLOSEST = zero-grad behavior and still bails out for non-BORDER address modes.

  • Implement PyTorch-style boundary derivatives:
    • 1D/2D/3D, CUDA and CPU branches: remove x_ok/y_ok/z_ok guards; compute gtx/gty/gtz using fetched corner values with OOB → 0 under BORDER.
    • Keep early-return for FILTER_CLOSEST.
    • Keep error-and-return for non-BORDER modes.
  • Update tests that assumed “near-boundary == zero” to assert non-zero within the half-texel band.

If you’re good with this, I can run a quick grep to patch all guarded sites and push an updated test set.

Would you also like me to:

  • Amend the adjoint comment header to explicitly say “OOB neighbors are treated as zeros; near-boundary derivatives are generally non-zero under LINEAR+BORDER”?
  • Add a short doc note in warp/_src/texture.py mirroring this?

✏️ Learnings added
Learnt from: eigenvivek
Repo: NVIDIA/warp PR: 1305
File: warp/native/texture.h:943-951
Timestamp: 2026-03-19T14:07:01.229Z
Learning: Warp texture adjoints should mirror PyTorch zeros-padding semantics under LINEAR+BORDER: drop axis guards and accumulate boundary derivatives using fetched corner values where OOB neighbors contribute 0.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


Failed to handle agent chat message. Please try again.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, implement these changes.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

The PR description image shows three grayscale pelvic X-ray panels labeled “Fixed,” “Moving,” and “Overlay,” each plotted on axes from 1 to 200. The “Overlay” panel draws many red contour-like lines over the fixed image, visualizing misalignment/registration features.

@coderabbit-user Acknowledged. I’ll proceed with Option A and include the doc note updates. Below are concrete diffs you can apply.

Plan to implement PyTorch-style boundary derivatives and docs/tests updates

  • Drop axis guards x_ok/y_ok/z_ok in CUDA and CPU adjoints; accumulate using fetched corner values where OOB contributes 0 under BORDER.
  • Keep early-return for FILTER_CLOSEST and bail out with error for non-BORDER modes.
  • Update/rename boundary tests to expect non-zero gradients near edges; retain far-boundary zero and CLOSEST zero tests.
  • Clarify comments in warp/native/texture.h and docstring in warp/_src/texture.py to state that near-boundary derivatives are non-zero under LINEAR+BORDER because OOB neighbors are zero-padded.

If you’d like, I can refine the boundary tests to check specific analytic slopes for the linear ramps (e.g., 1/width, 1/height, 1/depth factors) at those coordinates—say the word and I’ll include precise assertions.

Once you confirm, I’ll proceed to apply these patches across all referenced ranges and update any affected test registrations.


🏁 Script executed:

#!/bin/bash
set -e
echo "Files with potential boundary guards:"
rg -n --color never -C2 -e '\bx_ok\b|\by_ok\b|\bz_ok\b' warp/native/texture.h || true

echo
echo "Locate adj_texture_sample definitions:"
rg -n --color never -C2 'adj_texture_sample\(' warp/native/texture.h

echo
echo "List boundary-oriented tests:"
rg -n --color never -e 'near_boundary|boundary_zero|normalized_boundary' warp/tests/cuda/test_texture.py || true

echo
echo "Confirm docs note in Python texture module:"
rg -n --color never -C2 -e 'AD|adjoint|gradient|BORDER' warp/_src/texture.py || true

Length of output: 7533


The PR description image shows three grayscale pelvic X-ray panels labeled “Fixed,” “Moving,” and “Overlay,” each with axes from 1 to 200. The right “Overlay” panel draws many thin red contour lines over the fixed image, visualizing misalignment/registration features.

@coderabbit-user Thanks for the go-ahead. I ran a quick scan and confirmed:

  • 2D/3D adjoints gate derivatives via x_ok/y_ok/z_ok at Lines 1024–1043, 1070–1088, 1154–1186, and 1223–1255 in warp/native/texture.h.
  • 1D uses range-check gating (not named x_ok) in both CUDA and CPU branches.
  • Boundary tests and their registrations currently expect zero near the boundary.
  • Python docs already warn that AD is correct only for BORDER; we’ll add a note that near-boundary derivatives are non-zero under LINEAR+BORDER.

Below are concrete patches to:

  • Drop the axis guards and 1D range checks.

  • Accumulate boundary derivatives using fetched corner values where OOB contributes 0 under BORDER.

  • Update tests and registrations.

  • Clarify comments in C++ and Python.

  • Patch adjoints in warp/native/texture.h (1D/2D/3D, CUDA and CPU) to remove gating and use corner diffs with OOB→0 under BORDER.

  • Keep FILTER_CLOSEST early-return and non-BORDER error-and-return.

  • Update tests in warp/tests/cuda/test_texture.py to expect non-zero near edges; keep far-boundary/CLOSEST behavior.

  • Clarify docs: C++ header comment and Python note in warp/_src/texture.py.

If you’d like me to lock tests to exact analytic slopes for the ramp fixtures (e.g., 1/width, 1/height, 1/depth at those points), I can add precise assertions as a follow-up.


✏️ Learnings added
Learnt from: eigenvivek
Repo: NVIDIA/warp PR: 1305
File: warp/native/texture.h:943-951
Timestamp: 2026-03-20T02:16:35.717Z
Learning: For warp/native/texture.h adjoints, we will mirror PyTorch zeros-padding semantics: under LINEAR+BORDER, do not gate boundary derivatives by neighbor in-bounds checks; use corner differences with OOB neighbors contributing 0. Update tests accordingly to expect non-zero near-boundary grads.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


Failed to handle agent chat message. Please try again.

if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
return;

if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER || tex.address_mode_v != WP_TEXTURE_ADDRESS_BORDER) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally, we don't do any error checking in kernels since the print messages waste registers. If it really can't be checked at the Python scope instead, we can issue printf's when the kernel is run in a debug mode.

Comment on lines +2907 to +2910
@wp.kernel
def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
tid = wp.tid()
out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An minor optimization is to move the @wp.kernel definitions out of the test function scope and into the module scope to prevent the module's hash from changing multiple times during test execution and resulting in multiple unnecessary recompilations.

@shi-eric
Copy link
Copy Markdown
Contributor

Please make sure there is a CHANGELOG.md entry that reflects the change

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[REQ] Add adjoints for texture_sample

3 participants