From 6eed94fbcbf64a8c9c7ab3d797d9146c56009e5d Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 17:13:50 +0900 Subject: [PATCH 1/6] fix(dataset): reload episodes metadata before batch video encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using --resume with batch encoding enabled, the in-memory self.meta.episodes dataset was not being updated with newly recorded episodes. This caused an IndexError when trying to access episode metadata during batch encoding. The issue occurred because: 1. New episodes were saved to parquet files 2. self.meta.total_episodes was updated 3. But self.meta.episodes (HF Dataset) remained stale 4. Batch encoding tried to access episodes beyond the original size This fix reloads the episodes metadata at the start of _batch_save_episode_video() to ensure all episode data is available. Fixes IndexError: Invalid key: X is out of bounds for size Y 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index c8bc5049ec..811065237c 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1181,6 +1181,10 @@ def _batch_save_episode_video(self, start_episode: int, end_episode: int | None f"Batch encoding {self.batch_encoding_size} videos for episodes {start_episode} to {end_episode - 1}" ) + # Reload episodes to ensure we have the latest metadata for all episodes, + # especially when resuming recording with batch encoding enabled + self.meta.episodes = load_episodes(self.root) + chunk_idx = self.meta.episodes[start_episode]["data/chunk_index"] file_idx = self.meta.episodes[start_episode]["data/file_index"] episode_df_path = self.root / DEFAULT_EPISODES_PATH.format(chunk_index=chunk_idx, file_index=file_idx) From 917bfbfd6b8b6c37a3a934acb470c1844962415e Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 17:32:20 +0900 Subject: [PATCH 2/6] fix(dataset): flush metadata buffer before reloading episodes in batch encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix to reload episodes before batch encoding was incomplete. When batch encoding is triggered, the metadata buffer may not have been flushed to disk yet, causing load_episodes() to fail with a NoneType error. This fix ensures that: 1. Metadata buffer is flushed to disk before attempting to reload 2. Episodes are reloaded to get the latest metadata 3. Batch encoding can proceed with complete episode information Fixes: TypeError: 'NoneType' object is not subscriptable 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 811065237c..db9c179cbb 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1181,6 +1181,9 @@ def _batch_save_episode_video(self, start_episode: int, end_episode: int | None f"Batch encoding {self.batch_encoding_size} videos for episodes {start_episode} to {end_episode - 1}" ) + # Flush metadata buffer to ensure all episode metadata is written to disk + self.meta._flush_metadata_buffer() + # Reload episodes to ensure we have the latest metadata for all episodes, # especially when resuming recording with batch encoding enabled self.meta.episodes = load_episodes(self.root) From 2bac5eec8ab7a3b70772a3b4ca703cd9ffeff9c8 Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 17:41:06 +0900 Subject: [PATCH 3/6] fix(dataset): close ParquetWriter before reloading episodes in batch encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ParquetWriter buffers data and only writes complete files when closed. Simply flushing the buffer is not sufficient - the file remains incomplete and cannot be read by PyArrow, resulting in: "Parquet magic bytes not found in footer" This fix: 1. Calls _close_writer() instead of _flush_metadata_buffer() 2. Ensures the ParquetWriter is properly closed and data is fully written 3. A new writer will be created on the next metadata write operation Fixes: pyarrow.lib.ArrowInvalid: Parquet magic bytes not found in footer 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index db9c179cbb..0015c9ddd9 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1181,8 +1181,9 @@ def _batch_save_episode_video(self, start_episode: int, end_episode: int | None f"Batch encoding {self.batch_encoding_size} videos for episodes {start_episode} to {end_episode - 1}" ) - # Flush metadata buffer to ensure all episode metadata is written to disk - self.meta._flush_metadata_buffer() + # Close writer to ensure all episode metadata is flushed and written to disk completely + # This is necessary because ParquetWriter buffers data and only writes complete files on close + self.meta._close_writer() # Reload episodes to ensure we have the latest metadata for all episodes, # especially when resuming recording with batch encoding enabled From 3a26abdcaac84c26de9b3a2de52ac462084d6518 Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 18:46:11 +0900 Subject: [PATCH 4/6] fix(dataset): preserve images for batch encoding and clean up after MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using batch encoding, temporary images must be preserved until batch encoding completes. The previous code deleted images immediately after each episode, causing FileNotFoundError when batch encoding tried to access them. This fix: 1. Skip image deletion in save_episode() when using batch encoding 2. Delete images after each episode's video is encoded in batch mode 3. Ensures images are available for batch encoding while cleaning up afterward Fixes: FileNotFoundError: No images found in .../episode-000000 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 0015c9ddd9..843b1f2d9c 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1164,7 +1164,9 @@ def save_episode(self, episode_data: dict | None = None) -> None: if not episode_data: # Reset episode buffer and clean up temporary images (if not already deleted during video encoding) - self.clear_episode_buffer(delete_images=len(self.meta.image_keys) > 0) + # For batched encoding, keep images until batch encoding is complete + delete_images = len(self.meta.image_keys) > 0 and not (has_video_keys and use_batched_encoding) + self.clear_episode_buffer(delete_images=delete_images) def _batch_save_episode_video(self, start_episode: int, end_episode: int | None = None) -> None: """ @@ -1227,6 +1229,12 @@ def _batch_save_episode_video(self, start_episode: int, end_episode: int | None episode_df.to_parquet(episode_df_path) self.meta.episodes = load_episodes(self.root) + # Clean up temporary images after video encoding + for cam_key in self.meta.camera_keys: + img_dir = self._get_image_file_dir(ep_idx, cam_key) + if img_dir.is_dir(): + shutil.rmtree(img_dir) + def _save_episode_data(self, episode_buffer: dict) -> dict: """Save episode data to a parquet file and update the Hugging Face dataset of frames data. From 5361df5a6fd78db70e0d5f1ced96dc5ab2d48b53 Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 18:50:04 +0900 Subject: [PATCH 5/6] fix(dataset): check for video metadata before resume in batch encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resume logic in _save_episode_video() was checking only for the existence of episodes, not whether those episodes had video metadata. In batch encoding scenarios: 1. Episodes 0-9 are recorded with metadata (no video metadata yet) 2. Batch encoding starts and reloads episodes 3. _save_episode_video(video_key, 0) is called 4. episode_index == 0, so it enters the first-episode branch 5. self.meta.episodes exists and has length > 0 6. Code tries to access videos/{video_key}/chunk_index 7. KeyError: this key doesn't exist yet (videos not encoded) This fix adds a check to verify that video metadata actually exists before treating it as a resume case. This prevents KeyError when batch encoding a new dataset or episodes without prior video metadata. Fixes: KeyError: 'videos/observation.images.overhead/chunk_index' 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 843b1f2d9c..d680b82b7b 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1342,8 +1342,12 @@ def _save_episode_video(self, video_key: str, episode_index: int) -> dict: ): # Initialize indices for a new dataset made of the first episode data chunk_idx, file_idx = 0, 0 - if self.meta.episodes is not None and len(self.meta.episodes) > 0: - # It means we are resuming recording, so we need to load the latest episode + if ( + self.meta.episodes is not None + and len(self.meta.episodes) > 0 + and f"videos/{video_key}/chunk_index" in self.meta.episodes[-1] + ): + # It means we are resuming recording with existing video metadata # Update the indices to avoid overwriting the latest episode old_chunk_idx = self.meta.episodes[-1][f"videos/{video_key}/chunk_index"] old_file_idx = self.meta.episodes[-1][f"videos/{video_key}/file_index"] From d46740327f38c4b981318146740a47bd41c78e67 Mon Sep 17 00:00:00 2001 From: daikw Date: Tue, 4 Nov 2025 19:23:16 +0900 Subject: [PATCH 6/6] fix(dataset): defer image cleanup and add null check for batch encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two critical fixes for batch encoding reliability: 1. **Defer image cleanup to after successful batch encoding** Problem: Images were deleted inside _batch_save_episode_video() loop, but if an exception occurred after encoding (e.g., during parquet save), the retry in VideoEncodingManager.__exit__ would fail with FileNotFoundError. Solution: Move image cleanup to save_episode() and VideoEncodingManager.__exit__, ensuring cleanup happens only after the entire batch encoding succeeds. This allows retries to access the images if needed. 2. **Add null check for video metadata values** Problem: Checking only for key existence wasn't sufficient - the key can exist in the parquet schema but have NULL values, causing: "TypeError: unsupported operand type(s) for +=: 'NoneType' and 'int'" Solution: Add explicit check that video metadata values are not None before treating as resume case. Fixes: - FileNotFoundError: No images found during batch encoding retry - TypeError in update_chunk_file_indices with NoneType 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/lerobot/datasets/lerobot_dataset.py | 15 +++++++++------ src/lerobot/datasets/video_utils.py | 9 +++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index d680b82b7b..4fc5636da7 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -1160,6 +1160,14 @@ def save_episode(self, episode_data: dict | None = None) -> None: start_ep = self.num_episodes - self.batch_encoding_size end_ep = self.num_episodes self._batch_save_episode_video(start_ep, end_ep) + + # Clean up temporary images after successful batch encoding + for ep_idx in range(start_ep, end_ep): + for cam_key in self.meta.camera_keys: + img_dir = self._get_image_file_dir(ep_idx, cam_key) + if img_dir.is_dir(): + shutil.rmtree(img_dir) + self.episodes_since_last_encoding = 0 if not episode_data: @@ -1229,12 +1237,6 @@ def _batch_save_episode_video(self, start_episode: int, end_episode: int | None episode_df.to_parquet(episode_df_path) self.meta.episodes = load_episodes(self.root) - # Clean up temporary images after video encoding - for cam_key in self.meta.camera_keys: - img_dir = self._get_image_file_dir(ep_idx, cam_key) - if img_dir.is_dir(): - shutil.rmtree(img_dir) - def _save_episode_data(self, episode_buffer: dict) -> dict: """Save episode data to a parquet file and update the Hugging Face dataset of frames data. @@ -1346,6 +1348,7 @@ def _save_episode_video(self, video_key: str, episode_index: int) -> dict: self.meta.episodes is not None and len(self.meta.episodes) > 0 and f"videos/{video_key}/chunk_index" in self.meta.episodes[-1] + and self.meta.episodes[-1][f"videos/{video_key}/chunk_index"] is not None ): # It means we are resuming recording with existing video metadata # Update the indices to avoid overwriting the latest episode diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 0de791919a..fb19f82dee 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -643,6 +643,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): ) self.dataset._batch_save_episode_video(start_ep, end_ep) + # Clean up temporary images after successful batch encoding + for ep_idx in range(start_ep, end_ep): + for cam_key in self.dataset.meta.camera_keys: + img_dir = self.dataset._get_image_file_dir(ep_idx, cam_key) + if img_dir.is_dir(): + shutil.rmtree(img_dir) + + self.dataset.episodes_since_last_encoding = 0 + # Finalize the dataset to properly close all writers self.dataset.finalize()