add flattening utilities to deal with tracing outputs

ppwwyyxx · facebook-github-bot · commit 31eed14e86e2 · 2021-02-20T05:03:35.000-08:00
Reviewed By: theschnitz

Differential Revision: D26282744

fbshipit-source-id: 1380fde412270dba9b166aab32092801e9e3906a
diff --git a/detectron2/export/flatten.py b/detectron2/export/flatten.py
@@ -0,0 +1,173 @@
+import collections
+from dataclasses import dataclass
+from typing import List
+import torch
+
+from detectron2.structures import Boxes, Instances
+
+
+@dataclass
+class Schema:
+    """
+    A Schema defines how to flatten a possibly hierarchical object into tuple of
+    primitive objects, so it can be used as inputs/outputs of PyTorch's tracing.
+
+    PyTorch does not support tracing a function that produces rich output
+    structures (e.g. dict, Instances, Boxes). To trace such a function, we
+    flatten the rich object into tuple of tensors, and return this tuple of tensors
+    instead. Meanwhile, we also need to know how to "rebuild" the original object
+    from the flattened results, so we can evaluate the flattened results.
+    A Schema defines how to flatten an object, and while flattening it, it records
+    necessary schemas so that the object can be rebuilt using the flattened outputs.
+
+    The flattened object and the schema object is returned by ``.flatten`` classmethod.
+    Then the original object can be rebuilt with the ``__call__`` method of schema.
+
+    A Schema is a dataclass that can be serialized easily.
+    """
+
+    # inspired by FetchMapper in tensorflow/python/client/session.py
+
+    @classmethod
+    def flatten(cls, obj):
+        raise NotImplementedError
+
+    def __call__(self, values):
+        raise NotImplementedError
+
+    @staticmethod
+    def _concat(values):
+        ret = ()
+        idx_mapping = []
+        for v in values:
+            assert isinstance(v, tuple), "Flattened results must be a tuple"
+            oldlen = len(ret)
+            ret = ret + v
+            idx_mapping.append([oldlen, len(ret)])
+        return ret, idx_mapping
+
+    @staticmethod
+    def _split(values, idx_mapping):
+        if len(idx_mapping):
+            expected_len = idx_mapping[-1][-1]
+            assert (
+                len(values) == expected_len
+            ), f"Values has length {len(values)} but expect length {expected_len}."
+        ret = []
+        for (start, end) in idx_mapping:
+            ret.append(values[start:end])
+        return ret
+
+
+@dataclass
+class ListSchema(Schema):
+    schemas: List[Schema]
+    idx_mapping: List[List[int]]
+    is_tuple: bool
+
+    def __call__(self, values):
+        values = self._split(values, self.idx_mapping)
+        if len(values) != len(self.schemas):
+            raise ValueError(
+                f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!"
+            )
+        values = [m(v) for m, v in zip(self.schemas, values)]
+        return list(values) if not self.is_tuple else tuple(values)
+
+    @classmethod
+    def flatten(cls, obj):
+        is_tuple = isinstance(obj, tuple)
+        res = [flatten_to_tuple(k) for k in obj]
+        values, idx = cls._concat([k[0] for k in res])
+        return values, cls([k[1] for k in res], idx, is_tuple)
+
+
+@dataclass
+class IdentitySchema(Schema):
+    def __call__(self, values):
+        return values[0]
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj,), cls()
+
+
+@dataclass
+class DictSchema(Schema):
+    keys: List[str]
+    value_schema: ListSchema
+
+    def __call__(self, values):
+        values = self.value_schema(values)
+        return dict(zip(self.keys, values))
+
+    @classmethod
+    def flatten(cls, obj):
+        for k in obj.keys():
+            if not isinstance(k, str):
+                raise KeyError("Only support flattening dictionaries if keys are str.")
+        keys = sorted(obj.keys())
+        values = [obj[k] for k in keys]
+        ret, schema = ListSchema.flatten(values)
+        return ret, cls(keys, schema)
+
+
+@dataclass
+class InstancesSchema(Schema):
+    field_names: List[str]
+    field_schema: ListSchema
+
+    def __call__(self, values):
+        image_size, fields = values[-1], values[:-1]
+        fields = self.field_schema(fields)
+        fields = dict(zip(self.field_names, fields))
+        return Instances(image_size, **fields)
+
+    @classmethod
+    def flatten(cls, obj):
+        field_names = sorted(obj.get_fields().keys())
+        values = [obj.get(f) for f in field_names]
+        ret, schema = ListSchema.flatten(values)
+        size = obj.image_size
+        if not isinstance(size, torch.Tensor):
+            size = torch.tensor(size)
+        return ret + (size,), cls(field_names, schema)
+
+
+@dataclass
+class BoxesSchema(Schema):
+    def __call__(self, values):
+        return Boxes(values[0])
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj.tensor,), cls()
+
+
+# if more custom structures needed in the future, can allow
+# passing in extra schemas for custom types
+def flatten_to_tuple(obj):
+    """
+    Flatten an object so it can be used for PyTorch tracing.
+    Also returns how to rebuild the original object from the flattened outputs.
+
+    Returns:
+        res (tuple): the flattened results that can be used as tracing outputs
+        schema: an object with a ``__call__`` method such that ``schema(res) == obj``.
+             It is a pure dataclass that can be serialized.
+    """
+    schemas = [
+        ((str, bytes), IdentitySchema),
+        (collections.abc.Sequence, ListSchema),
+        (collections.abc.Mapping, DictSchema),
+        (Instances, InstancesSchema),
+        (Boxes, BoxesSchema),
+    ]
+    for klass, schema in schemas:
+        if isinstance(obj, klass):
+            F = schema
+            break
+    else:
+        F = IdentitySchema
+
+    return F.flatten(obj)
diff --git a/detectron2/utils/testing.py b/detectron2/utils/testing.py
@@ -62,18 +62,25 @@ def get_sample_coco_image(tensor=True):
     return ret
 
 
-def assert_instances_allclose(input, other, rtol=1e-5, msg=""):
+def assert_instances_allclose(input, other, *, rtol=1e-5, msg="", size_as_tensor=False):
     """
     Args:
         input, other (Instances):
+        size_as_tensor: compare image_size of the Instances as tensors (instead of tuples).
+             Useful for comparing outputs of tracing.
     """
     if not msg:
         msg = "Two Instances are different! "
     else:
         msg = msg.rstrip() + " "
-    assert input.image_size == other.image_size, (
-        msg + f"image_size is {input.image_size} vs. {other.image_size}!"
-    )
+
+    size_error_msg = msg + f"image_size is {input.image_size} vs. {other.image_size}!"
+    if size_as_tensor:
+        assert torch.equal(
+            torch.tensor(input.image_size), torch.tensor(other.image_size)
+        ), size_error_msg
+    else:
+        assert input.image_size == other.image_size, size_error_msg
     fields = sorted(input.get_fields().keys())
     fields_other = sorted(other.get_fields().keys())
     assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!"
diff --git a/tests/test_export_torchscript.py b/tests/test_export_torchscript.py
@@ -8,6 +8,7 @@
 
 from detectron2 import model_zoo
 from detectron2.config import get_cfg
+from detectron2.export.flatten import flatten_to_tuple
 from detectron2.export.torchscript import dump_torchscript_IR, export_torchscript_with_instances
 from detectron2.export.torchscript_patch import patch_builtin_len
 from detectron2.layers import ShapeSpec
@@ -83,77 +84,45 @@ def _test_retinanet_model(self, config_path):
 class TestTracing(unittest.TestCase):
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
     def testMaskRCNN(self):
-        class WrapModel(nn.ModuleList):
-            def forward(self, image):
-                inputs = [{"image": image}]
-                outputs = self[0].inference(inputs, do_postprocess=False)[0]
-                size = outputs.image_size
-                if torch.jit.is_tracing():
-                    assert isinstance(size, torch.Tensor)
-                else:
-                    size = torch.as_tensor(size)
-                return (
-                    size,
-                    outputs.pred_classes,
-                    outputs.pred_boxes.tensor,
-                    outputs.scores,
-                    outputs.pred_masks,
-                )
-
-            @staticmethod
-            def convert_output(output):
-                r = Instances(tuple(output[0]))
-                r.pred_classes = output[1]
-                r.pred_boxes = Boxes(output[2])
-                r.scores = output[3]
-                r.pred_masks = output[4]
-                return r
+        def inference_func(model, image):
+            inputs = [{"image": image}]
+            outputs = model.inference(inputs, do_postprocess=False)[0]
+            return outputs
 
-        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", WrapModel)
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func)
 
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
     def testRetinaNet(self):
-        class WrapModel(nn.ModuleList):
-            def forward(self, image):
-                inputs = [{"image": image}]
-                outputs = self[0].forward(inputs)[0]["instances"]
-                size = outputs.image_size
-                if torch.jit.is_tracing():
-                    assert isinstance(size, torch.Tensor)
-                else:
-                    size = torch.as_tensor(size)
-                return (
-                    size,
-                    outputs.pred_classes,
-                    outputs.pred_boxes.tensor,
-                    outputs.scores,
-                )
-
-            @staticmethod
-            def convert_output(output):
-                r = Instances(tuple(output[0]))
-                r.pred_classes = output[1]
-                r.pred_boxes = Boxes(output[2])
-                r.scores = output[3]
-                return r
+        def inference_func(model, image):
+            return model.forward([{"image": image}])[0]["instances"]
 
-        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml", WrapModel)
+        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml", inference_func)
 
-    def _test_model(self, config_path, WrapperCls):
-        # TODO wrapper should be handled by export API in the future
+    def _test_model(self, config_path, inference_func):
         model = model_zoo.get(config_path, trained=True)
         image = get_sample_coco_image()
 
-        model = WrapperCls([model])
-        model.eval()
+        class Wrapper(nn.ModuleList):  # a wrapper to make the model traceable
+            def forward(self, image):
+                outputs = inference_func(self[0], image)
+                flattened_outputs, schema = flatten_to_tuple(outputs)
+                if not hasattr(self, "schema"):
+                    self.schema = schema
+                return flattened_outputs
+
+            def rebuild(self, flattened_outputs):
+                return self.schema(flattened_outputs)
+
+        wrapper = Wrapper([model])
+        wrapper.eval()
         with torch.no_grad(), patch_builtin_len():
             small_image = nn.functional.interpolate(image, scale_factor=0.5)
             # trace with a different image, and the trace must still work
-            traced_model = torch.jit.trace(model, (small_image,))
+            traced_model = torch.jit.trace(wrapper, (small_image,))
 
-            output = WrapperCls.convert_output(model(image))
-            traced_output = WrapperCls.convert_output(traced_model(image))
-        assert_instances_allclose(output, traced_output)
+            output = inference_func(model, image)
+            traced_output = wrapper.rebuild(traced_model(image))
+        assert_instances_allclose(output, traced_output, size_as_tensor=True)
 
     def testKeypointHead(self):
         class M(nn.Module):
@@ -214,3 +183,22 @@ def forward(self, x):
             for name in ["model_ts_code", "model_ts_IR", "model_ts_IR_inlined", "model"]:
                 fname = os.path.join(d, name + ".txt")
                 self.assertTrue(os.stat(fname).st_size > 0, fname)
+
+    def test_flatten_basic(self):
+        obj = [3, ([5, 6], {"name": [7, 9], "name2": 3})]
+        res, schema = flatten_to_tuple(obj)
+        self.assertEqual(res, (3, 5, 6, 7, 9, 3))
+        new_obj = schema(res)
+        self.assertEqual(new_obj, obj)
+
+    def test_flatten_instances_boxes(self):
+        inst = Instances(
+            torch.tensor([5, 8]), pred_masks=torch.tensor([3]), pred_boxes=Boxes(torch.ones((1, 4)))
+        )
+        obj = [3, ([5, 6], inst)]
+        res, schema = flatten_to_tuple(obj)
+        self.assertEqual(res[:3], (3, 5, 6))
+        for r, expected in zip(res[3:], (inst.pred_boxes.tensor, inst.pred_masks, inst.image_size)):
+            self.assertIs(r, expected)
+        new_obj = schema(res)
+        assert_instances_allclose(new_obj[1][1], inst, rtol=0.0, size_as_tensor=True)