Fix Remat error when called with a model (#21094)

divyashreepathihalli · laxmareddyp · commit 9067d8060992 · 2025-03-31T15:29:37.000-07:00
* add print

* fix remat issue

* simplify code

* enable traceback filtering and update the function sig

* add a wrapper for activations

* change to except

* add layer call decorator

* fix remat call
diff --git a/keras/src/layers/layer.py b/keras/src/layers/layer.py
@@ -17,6 +17,7 @@
 """
 
 import collections
+import functools
 import inspect
 import math
 import warnings
@@ -1043,11 +1044,13 @@ def stateless_call(
                 if self._remat_mode is not None:
                     outputs = self.rematerialized_call(
                         self.quantized_call, *args, **kwargs
-                    )
+                    )(*args, **kwargs)
                 else:
                     outputs = self.quantized_call(*args, **kwargs)
             elif self._remat_mode is not None:
-                outputs = self.rematerialized_call(self.call, *args, **kwargs)
+                outputs = self.rematerialized_call(self.call, *args, **kwargs)(
+                    *args, **kwargs
+                )
             else:
                 outputs = self.call(*args, **kwargs)
             if return_losses:
@@ -1601,13 +1604,13 @@ def compute_size(x):
 
         # Full rematerialization
         if self._remat_mode.mode == "full":
-            return remat.remat(layer_call)(*args, **kwargs)
+            return remat.remat(layer_call)
 
         # Apply rematerialization to specific layers
         elif self._remat_mode.mode == "list_of_layers" and (
             self.name in self._remat_mode.layer_names
         ):
-            return remat.remat(layer_call)(*args, **kwargs)
+            return remat.remat(layer_call)
 
         # Apply rematerialization based on output size threshold
         elif self._remat_mode.mode == "larger_than":
@@ -1619,20 +1622,24 @@ def compute_size(x):
                 output_size
                 and output_size > self._remat_mode.output_size_threshold
             ):
-                return remat.remat(layer_call)(*args, **kwargs)
+                return remat.remat(layer_call)
         elif self._remat_mode.mode == "activations":
             has_activation = (
                 hasattr(self, "activation") and self.activation is not None
             )
             if has_activation:
-                not_rematted_activation = self.activation
-                try:
-                    self.activation = remat.remat(not_rematted_activation)
-                    return layer_call(*args, **kwargs)
-                finally:
-                    self.activation = not_rematted_activation
 
-        return layer_call(*args, **kwargs)
+                @functools.wraps(layer_call)
+                def rematerialized_activation_call_wrapper(*args, **kwargs):
+                    original_activation = self.activation
+                    self.activation = remat.remat(original_activation)
+                    try:
+                        return layer_call(*args, **kwargs)
+                    finally:
+                        self.activation = original_activation
+
+                return rematerialized_activation_call_wrapper
+        return layer_call
 
 
 def is_backend_tensor_or_symbolic(x, allow_none=False):
diff --git a/keras/src/layers/layer_test.py b/keras/src/layers/layer_test.py
@@ -17,6 +17,7 @@
 from keras.src.backend.common import remat
 from keras.src.backend.common.remat import RematScope
 from keras.src.models import Model
+from keras.src.utils import traceback_utils
 
 
 class LayerTest(testing.TestCase):
@@ -219,9 +220,11 @@ def test_functional_model_with_remat(self):
             self.skipTest(
                 "remat is not supported in openvino and numpy backends."
             )
-        with patch(
-            "keras.src.backend.common.remat.remat", wraps=remat.remat
-        ) as mock_remat:
+        traceback_utils.enable_traceback_filtering()
+        mock_remat = MockRemat()
+        with mock.patch(
+            "keras.src.backend.common.remat.remat", wraps=mock_remat
+        ):
             # Define model inputs
             inputs = Input(shape=(32, 32, 3))
 
diff --git a/keras/src/ops/operation.py b/keras/src/ops/operation.py
@@ -37,9 +37,15 @@ def __call__(self, *args, **kwargs):
             else:
                 if getattr(self, "_remat_mode", None) is not None:
                     if getattr(self, "quantization_mode", None) is not None:
-                        call_fn = self.rematerialized_call(self.quantized_call)
+                        call_fn = self.rematerialized_call(
+                            self.quantized_call,
+                            *args,
+                            **kwargs,
+                        )
                     else:
-                        call_fn = self.rematerialized_call(self.call)
+                        call_fn = self.rematerialized_call(
+                            self.call, *args, **kwargs
+                        )
                 else:
                     if getattr(self, "quantization_mode", None) is not None:
                         call_fn = self.quantized_call
@@ -58,9 +64,11 @@ def __call__(self, *args, **kwargs):
             if getattr(self, "quantization_mode", None) is not None:
                 return self.rematerialized_call(
                     self.quantized_call, *args, **kwargs
-                )
+                )(*args, **kwargs)
             else:
-                return self.rematerialized_call(self.call, *args, **kwargs)
+                return self.rematerialized_call(self.call, *args, **kwargs)(
+                    *args, **kwargs
+                )
         else:
             if getattr(self, "quantization_mode", None) is not None:
                 return self.quantized_call(*args, **kwargs)