From cda20d64e44d599b780aa945959a4d1271be26e3 Mon Sep 17 00:00:00 2001
From: Werner Robitza <werner.robitza@gmail.com>
Date: Sat, 18 Oct 2025 20:52:32 +0200
Subject: [PATCH] feat: add selective audio stream normalization

Add support for selective audio stream normalization, addressing issue #285.

New CLI arguments:

- `-as/--audio-streams`: Select specific audio streams by index (comma-separated)
- `--audio-default-only`: Only normalize audio streams with 'default' disposition
- `--keep-other-audio`: Keep non-selected streams as passthrough (unchanged)

Implementation details:

- Added `is_default` flag to AudioStream to track disposition
- Updated MediaFile.parse_streams() to parse "(default)" flag from FFmpeg output
- Added _get_streams_to_normalize() method to filter streams based on selection
- Updated first pass, filter generation, and second pass for selected + passthrough streams
- Proper output stream index tracking for correct codec assignment

Validation:

- Prevents using both `-as` and `--audio-default-only` together
- Prevents using both `--keep-other-audio` and `--keep-original-audio` together

Tests:

- Added 6 new test cases covering all scenarios
- All 46 tests passing (40 original + 6 new)

Documentation:

- Updated docs/usage/options.md with complete documentation for new options
---
 docs/usage/options.md                     |  26 ++++
 src/ffmpeg_normalize/__main__.py          |  53 ++++++++
 src/ffmpeg_normalize/_ffmpeg_normalize.py |  24 ++++
 src/ffmpeg_normalize/_media_file.py       | 147 +++++++++++++++++++---
 src/ffmpeg_normalize/_streams.py          |   3 +
 tests/test_all.py                         |  73 +++++++++++
 6 files changed, 306 insertions(+), 20 deletions(-)

diff --git a/docs/usage/options.md b/docs/usage/options.md
index 9b1d85c..de98a2f 100644
--- a/docs/usage/options.md
+++ b/docs/usage/options.md
@@ -187,6 +187,32 @@ Set the number of audio channels. If not specified, the input channel layout wil
 
 Copy original, non-normalized audio streams to output file
 
+## Audio Stream Selection
+
+### `-as AUDIO_STREAMS, --audio-streams AUDIO_STREAMS`
+
+Select specific audio streams to normalize by stream index (comma-separated).
+
+Example: `-as 1` normalizes only stream 1, `-as 1,2` normalizes streams 1 and 2.
+
+If not specified, all audio streams will be normalized (default behavior).
+
+### `--audio-default-only`
+
+Only normalize audio streams with the 'default' disposition flag.
+
+This is useful for files with multiple audio tracks where only the default track should be normalized (e.g., main audio track vs. commentary tracks).
+
+### `--keep-other-audio`
+
+Keep non-selected audio streams in the output file (copy without normalization).
+
+Must be combined with either `-as`/`--audio-streams` or `--audio-default-only`.
+
+Example: `ffmpeg-normalize input.mkv -as 1 --keep-other-audio` will normalize stream 1 and copy all other audio streams unchanged.
+
+**Note:** This option is mutually exclusive with `--keep-original-audio`. Use `--keep-original-audio` to keep all original streams alongside normalized ones, or `--keep-other-audio` to keep only non-selected streams as passthrough.
+
 ### `-prf PRE_FILTER, --pre-filter PRE_FILTER`
 
 Add an audio filter chain before applying normalization.
diff --git a/src/ffmpeg_normalize/__main__.py b/src/ffmpeg_normalize/__main__.py
index 7a02c87..24d6977 100644
--- a/src/ffmpeg_normalize/__main__.py
+++ b/src/ffmpeg_normalize/__main__.py
@@ -315,6 +315,44 @@ def create_parser() -> argparse.ArgumentParser:
         ),
     )
 
+    group_stream_selection = parser.add_argument_group("Audio Stream Selection")
+    group_stream_selection.add_argument(
+        "-as",
+        "--audio-streams",
+        type=str,
+        help=textwrap.dedent(
+            """\
+        Select specific audio streams to normalize by stream index (comma-separated).
+        Example: --audio-streams 0,2 will normalize only streams 0 and 2.
+
+        By default, all audio streams are normalized.
+        """
+        ),
+    )
+    group_stream_selection.add_argument(
+        "--audio-default-only",
+        action="store_true",
+        help=textwrap.dedent(
+            """\
+        Only normalize audio streams with the 'default' disposition flag.
+        This is useful for files with multiple audio tracks where only the main track
+        should be normalized (e.g., keeping commentary tracks unchanged).
+        """
+        ),
+    )
+    group_stream_selection.add_argument(
+        "--keep-other-audio",
+        action="store_true",
+        help=textwrap.dedent(
+            """\
+        Keep non-selected audio streams in the output file (copy without normalization).
+        Only applies when --audio-streams or --audio-default-only is used.
+
+        By default, only selected streams are included in the output.
+        """
+        ),
+    )
+
     group_acodec = parser.add_argument_group("Audio Encoding")
     group_acodec.add_argument(
         "-c:a",
@@ -553,6 +591,18 @@ def _split_options(opts: str) -> list[str]:
     extra_input_options = _split_options(cli_args.extra_input_options)
     extra_output_options = _split_options(cli_args.extra_output_options)
 
+    # parse audio streams selection
+    audio_streams = None
+    if cli_args.audio_streams:
+        try:
+            audio_streams = [int(s.strip()) for s in cli_args.audio_streams.split(",")]
+        except ValueError:
+            error("Invalid audio stream indices. Must be comma-separated integers.")
+
+    # validate stream selection options
+    if cli_args.audio_default_only and cli_args.audio_streams:
+        error("Cannot use both --audio-default-only and --audio-streams together.")
+
     ffmpeg_normalize = FFmpegNormalize(
         normalization_type=cli_args.normalization_type,
         target_level=cli_args.target_level,
@@ -586,6 +636,9 @@ def _split_options(opts: str) -> list[str]:
         dry_run=cli_args.dry_run,
         progress=cli_args.progress,
         replaygain=cli_args.replaygain,
+        audio_streams=audio_streams,
+        audio_default_only=cli_args.audio_default_only,
+        keep_other_audio=cli_args.keep_other_audio,
     )
 
     if cli_args.output and len(cli_args.input) > len(cli_args.output):
diff --git a/src/ffmpeg_normalize/_ffmpeg_normalize.py b/src/ffmpeg_normalize/_ffmpeg_normalize.py
index 883a3ea..6a33cea 100644
--- a/src/ffmpeg_normalize/_ffmpeg_normalize.py
+++ b/src/ffmpeg_normalize/_ffmpeg_normalize.py
@@ -84,6 +84,9 @@ class FFmpegNormalize:
         debug (bool, optional): Debug. Defaults to False.
         progress (bool, optional): Progress. Defaults to False.
         replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False.
+        audio_streams (list[int] | None, optional): List of audio stream indices to normalize. Defaults to None (all streams).
+        audio_default_only (bool, optional): Only normalize audio streams with default disposition. Defaults to False.
+        keep_other_audio (bool, optional): Keep non-selected audio streams in output (copy without normalization). Defaults to False.
 
     Raises:
         FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
@@ -124,6 +127,9 @@ def __init__(
         debug: bool = False,
         progress: bool = False,
         replaygain: bool = False,
+        audio_streams: list[int] | None = None,
+        audio_default_only: bool = False,
+        keep_other_audio: bool = False,
     ):
         self.ffmpeg_exe = get_ffmpeg_exe()
         self.has_loudnorm_capabilities = ffmpeg_has_loudnorm()
@@ -207,6 +213,11 @@ def __init__(
         self.progress = progress
         self.replaygain = replaygain
 
+        # Stream selection options
+        self.audio_streams = audio_streams
+        self.audio_default_only = audio_default_only
+        self.keep_other_audio = keep_other_audio
+
         if (
             self.audio_codec is None or "pcm" in self.audio_codec
         ) and self.output_format in PCM_INCOMPATIBLE_FORMATS:
@@ -221,6 +232,19 @@ def __init__(
                 "ReplayGain only works for EBU normalization type for now."
             )
 
+        # Validate stream selection options
+        if self.audio_streams is not None and self.audio_default_only:
+            raise FFmpegNormalizeError(
+                "Cannot use both audio_streams and audio_default_only together."
+            )
+
+        if self.keep_other_audio and self.keep_original_audio:
+            raise FFmpegNormalizeError(
+                "Cannot use both --keep-other-audio and --keep-original-audio together. "
+                "Use --keep-original-audio to keep all original streams alongside normalized ones, "
+                "or --keep-other-audio to keep only non-selected streams as passthrough."
+            )
+
         self.stats: list[LoudnessStatisticsWithMetadata] = []
         self.media_files: list[MediaFile] = []
         self.file_count = 0
diff --git a/src/ffmpeg_normalize/_media_file.py b/src/ffmpeg_normalize/_media_file.py
index 2e464a3..36db1d2 100644
--- a/src/ffmpeg_normalize/_media_file.py
+++ b/src/ffmpeg_normalize/_media_file.py
@@ -136,6 +136,18 @@ def parse_streams(self) -> None:
 
         output_lines = [line.strip() for line in output.split("\n")]
 
+        # First pass: parse disposition flags for each stream
+        stream_dispositions: dict[int, bool] = {}
+
+        for line in output_lines:
+            if line.startswith("Stream"):
+                if stream_id_match := re.search(r"#0:([\d]+)", line):
+                    stream_id = int(stream_id_match.group(1))
+                    # Check if (default) appears on the Stream line
+                    is_default = "(default)" in line
+                    stream_dispositions[stream_id] = is_default
+
+        # Second pass: parse stream information
         duration = None
         for line in output_lines:
             if "Duration" in line:
@@ -155,8 +167,12 @@ def parse_streams(self) -> None:
             else:
                 continue
 
+            is_default = stream_dispositions.get(stream_id, False)
+
             if "Audio" in line:
-                _logger.debug(f"Found audio stream at index {stream_id}")
+                _logger.debug(
+                    f"Found audio stream at index {stream_id} (default: {is_default})"
+                )
                 sample_rate_match = re.search(r"(\d+) Hz", line)
                 sample_rate = (
                     int(sample_rate_match.group(1)) if sample_rate_match else None
@@ -170,6 +186,7 @@ def parse_streams(self) -> None:
                     sample_rate,
                     bit_depth,
                     duration,
+                    is_default,
                 )
 
             elif "Video" in line:
@@ -201,6 +218,53 @@ def parse_streams(self) -> None:
             self.streams["video"] = {}
             self.streams["subtitle"] = {}
 
+    def _get_streams_to_normalize(self) -> list[AudioStream]:
+        """
+        Determine which audio streams to normalize based on configuration.
+
+        Returns:
+            list[AudioStream]: List of audio streams to normalize
+        """
+        all_audio_streams = list(self.streams["audio"].values())
+
+        if self.ffmpeg_normalize.audio_streams is not None:
+            # User specified specific stream indices
+            selected_streams = [
+                stream
+                for stream in all_audio_streams
+                if stream.stream_id in self.ffmpeg_normalize.audio_streams
+            ]
+            if not selected_streams:
+                _logger.warning(
+                    f"No audio streams found matching indices {self.ffmpeg_normalize.audio_streams}. "
+                    f"Available streams: {[s.stream_id for s in all_audio_streams]}"
+                )
+            else:
+                _logger.info(
+                    f"Normalizing selected audio streams: {[s.stream_id for s in selected_streams]}"
+                )
+            return selected_streams
+
+        elif self.ffmpeg_normalize.audio_default_only:
+            # Only normalize streams with default disposition
+            default_streams = [
+                stream for stream in all_audio_streams if stream.is_default
+            ]
+            if not default_streams:
+                _logger.warning(
+                    "No audio streams with 'default' disposition found. "
+                    f"Available streams: {[s.stream_id for s in all_audio_streams]}"
+                )
+            else:
+                _logger.info(
+                    f"Normalizing default audio streams: {[s.stream_id for s in default_streams]}"
+                )
+            return default_streams
+
+        else:
+            # Normalize all streams (default behavior)
+            return all_audio_streams
+
     def run_normalization(self) -> None:
         """
         Run the normalization process for this file.
@@ -400,7 +464,9 @@ def _first_pass(self) -> None:
         """
         _logger.debug(f"Parsing normalization info for {self.input_file}")
 
-        for index, audio_stream in enumerate(self.streams["audio"].values()):
+        streams_to_normalize = self._get_streams_to_normalize()
+
+        for index, audio_stream in enumerate(streams_to_normalize):
             if self.ffmpeg_normalize.normalization_type == "ebu":
                 fun = getattr(audio_stream, "parse_loudnorm_stats")
             else:
@@ -410,7 +476,7 @@ def _first_pass(self) -> None:
                 with tqdm(
                     total=100,
                     position=1,
-                    desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}",
+                    desc=f"Stream {index + 1}/{len(streams_to_normalize)}",
                     bar_format=TQDM_BAR_FORMAT,
                 ) as pbar:
                     for progress in fun():
@@ -429,7 +495,9 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
         filter_chains = []
         output_labels = []
 
-        for audio_stream in self.streams["audio"].values():
+        streams_to_normalize = self._get_streams_to_normalize()
+
+        for audio_stream in streams_to_normalize:
             skip_normalization = False
             if self.ffmpeg_normalize.lower_only:
                 if self.ffmpeg_normalize.normalization_type == "ebu":
@@ -551,29 +619,66 @@ def _second_pass(self) -> Iterator[float]:
                         f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled."
                     )
 
+        # Determine streams to normalize and passthrough
+        streams_to_normalize = self._get_streams_to_normalize()
+        all_audio_streams = list(self.streams["audio"].values())
+
+        # Determine which streams to passthrough
+        if self.ffmpeg_normalize.keep_other_audio and (
+            self.ffmpeg_normalize.audio_streams is not None
+            or self.ffmpeg_normalize.audio_default_only
+        ):
+            streams_to_passthrough = [
+                s for s in all_audio_streams if s not in streams_to_normalize
+            ]
+        else:
+            streams_to_passthrough = []
+
         # ... and map the output of the normalization filters
         for ol in output_labels:
             cmd.extend(["-map", ol])
 
-        # set audio codec (never copy)
-        if self.ffmpeg_normalize.audio_codec:
-            cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec])
-        else:
-            for index, (_, audio_stream) in enumerate(self.streams["audio"].items()):
-                cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()])
+        # ... and map passthrough audio streams (copy without normalization)
+        for stream in streams_to_passthrough:
+            cmd.extend(["-map", f"0:{stream.stream_id}"])
 
-        # other audio options (if any)
+        # Track output audio stream index for codec assignment
+        output_audio_idx = 0
+
+        # set audio codec for normalized streams
+        for audio_stream in streams_to_normalize:
+            if self.ffmpeg_normalize.audio_codec:
+                codec = self.ffmpeg_normalize.audio_codec
+            else:
+                codec = audio_stream.get_pcm_codec()
+            cmd.extend([f"-c:a:{output_audio_idx}", codec])
+            output_audio_idx += 1
+
+        # set audio codec for passthrough streams (always copy)
+        for _ in streams_to_passthrough:
+            cmd.extend([f"-c:a:{output_audio_idx}", "copy"])
+            output_audio_idx += 1
+
+        # other audio options (if any) - only apply to normalized streams
         if self.ffmpeg_normalize.audio_bitrate:
             if self.ffmpeg_normalize.audio_codec == "libvorbis":
                 # libvorbis takes just a "-b" option, for some reason
                 # https://github.com/slhck/ffmpeg-normalize/issues/277
                 cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)])
             else:
-                cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)])
+                # Only apply to normalized streams
+                for idx in range(len(streams_to_normalize)):
+                    cmd.extend(
+                        [f"-b:a:{idx}", str(self.ffmpeg_normalize.audio_bitrate)]
+                    )
         if self.ffmpeg_normalize.sample_rate:
-            cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)])
+            # Only apply to normalized streams
+            for idx in range(len(streams_to_normalize)):
+                cmd.extend([f"-ar:a:{idx}", str(self.ffmpeg_normalize.sample_rate)])
         if self.ffmpeg_normalize.audio_channels:
-            cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)])
+            # Only apply to normalized streams
+            for idx in range(len(streams_to_normalize)):
+                cmd.extend([f"-ac:a:{idx}", str(self.ffmpeg_normalize.audio_channels)])
 
         # ... and subtitles
         if not self.ffmpeg_normalize.subtitle_disable:
@@ -583,10 +688,11 @@ def _second_pass(self) -> Iterator[float]:
             cmd.extend(["-c:s", "copy"])
 
         if self.ffmpeg_normalize.keep_original_audio:
-            highest_index = len(self.streams["audio"])
+            # Map all original audio streams after normalized and passthrough streams
             for index, _ in enumerate(self.streams["audio"].items()):
                 cmd.extend(["-map", f"0:a:{index}"])
-                cmd.extend([f"-c:a:{highest_index + index}", "copy"])
+                cmd.extend([f"-c:a:{output_audio_idx}", "copy"])
+                output_audio_idx += 1
 
         # extra options (if any)
         if self.ffmpeg_normalize.extra_output_options:
@@ -645,13 +751,14 @@ def _second_pass(self) -> Iterator[float]:
             ebu_pass_2_stats = list(
                 AudioStream.prune_and_parse_loudnorm_output(output).values()
             )
-            # Only set second pass stats if they exist (they might not if all streams were skipped with --lower-only)
-            if len(ebu_pass_2_stats) == len(self.streams["audio"]):
-                for idx, audio_stream in enumerate(self.streams["audio"].values()):
+            # Only set second pass stats for streams that were actually normalized
+            streams_to_normalize = self._get_streams_to_normalize()
+            if len(ebu_pass_2_stats) == len(streams_to_normalize):
+                for idx, audio_stream in enumerate(streams_to_normalize):
                     audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx])
             else:
                 _logger.debug(
-                    f"Expected {len(self.streams['audio'])} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. "
+                    f"Expected {len(streams_to_normalize)} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. "
                     "This can happen when normalization is skipped (e.g., with --lower-only)."
                 )
 
diff --git a/src/ffmpeg_normalize/_streams.py b/src/ffmpeg_normalize/_streams.py
index 592d704..12b1e77 100644
--- a/src/ffmpeg_normalize/_streams.py
+++ b/src/ffmpeg_normalize/_streams.py
@@ -99,6 +99,7 @@ def __init__(
         sample_rate: int | None,
         bit_depth: int | None,
         duration: float | None,
+        is_default: bool = False,
     ):
         """
         Create an AudioStream object.
@@ -110,6 +111,7 @@ def __init__(
             sample_rate (int): sample rate in Hz
             bit_depth (int): bit depth in bits
             duration (float): duration in seconds
+            is_default (bool): Whether this stream has the default disposition flag
         """
         super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)
 
@@ -124,6 +126,7 @@ def __init__(
         self.bit_depth = bit_depth
 
         self.duration = duration
+        self.is_default = is_default
 
     @staticmethod
     def _constrain(
diff --git a/tests/test_all.py b/tests/test_all.py
index b50e399..b9c82c2 100644
--- a/tests/test_all.py
+++ b/tests/test_all.py
@@ -526,3 +526,76 @@ def test_replaygain(self):
             # git checkout the files!
             for file in REPLAYGAIN_FILES:
                 subprocess.run(["git", "checkout", file], check=False)
+
+    def test_audio_streams_single(self):
+        """Test normalizing only a single audio stream"""
+        ffmpeg_normalize_call(["tests/test.mp4", "-as", "1", "-nt", "ebu"])
+        assert os.path.isfile("normalized/test.mkv")
+        # Check that output has only 1 audio stream
+        streams = _get_stream_info("normalized/test.mkv")
+        audio_streams = [s for s in streams if s["codec_type"] == "audio"]
+        assert len(audio_streams) == 1
+        # Verify stats show only one stream was normalized
+        stats = _get_stats("normalized/test.mkv", "ebu")
+        assert len(stats) == 1
+        assert stats[0]["stream_id"] == 1
+
+    def test_audio_streams_multiple(self):
+        """Test normalizing multiple specific audio streams"""
+        ffmpeg_normalize_call(["tests/test.mp4", "-as", "1,2", "-nt", "ebu"])
+        assert os.path.isfile("normalized/test.mkv")
+        # Check that output has 2 audio streams
+        streams = _get_stream_info("normalized/test.mkv")
+        audio_streams = [s for s in streams if s["codec_type"] == "audio"]
+        assert len(audio_streams) == 2
+        # Verify stats show both streams were normalized
+        stats = _get_stats("normalized/test.mkv", "ebu")
+        assert len(stats) == 2
+
+    def test_audio_streams_with_keep_other(self):
+        """Test normalizing one stream while keeping others as passthrough"""
+        ffmpeg_normalize_call(
+            ["tests/test.mp4", "-as", "1", "--keep-other-audio", "-nt", "ebu"]
+        )
+        assert os.path.isfile("normalized/test.mkv")
+        # Check that output has 2 audio streams (1 normalized, 1 passthrough)
+        streams = _get_stream_info("normalized/test.mkv")
+        audio_streams = [s for s in streams if s["codec_type"] == "audio"]
+        assert len(audio_streams) == 2
+        # First audio stream should be normalized (PCM or similar)
+        assert "pcm" in audio_streams[0]["codec_name"]
+        # Second audio stream should be copied (ac3)
+        assert audio_streams[1]["codec_name"] == "ac3"
+
+    def test_audio_default_only(self):
+        """Test normalizing only default audio streams"""
+        # Note: test.mp4 has both audio streams marked as default
+        ffmpeg_normalize_call(["tests/test.mp4", "--audio-default-only", "-nt", "ebu"])
+        assert os.path.isfile("normalized/test.mkv")
+        # Since both streams are default, both should be normalized
+        streams = _get_stream_info("normalized/test.mkv")
+        audio_streams = [s for s in streams if s["codec_type"] == "audio"]
+        assert len(audio_streams) == 2
+        # Verify stats show both streams were normalized
+        stats = _get_stats("normalized/test.mkv", "ebu")
+        assert len(stats) == 2
+
+    def test_audio_streams_invalid_option_combination(self):
+        """Test that using both --audio-streams and --audio-default-only fails"""
+        _, stderr = ffmpeg_normalize_call(
+            ["tests/test.mp4", "-as", "1", "--audio-default-only"]
+        )
+        assert "Cannot use both" in stderr
+
+    def test_keep_other_and_keep_original_conflict(self):
+        """Test that using both --keep-other-audio and --keep-original-audio fails"""
+        _, stderr = ffmpeg_normalize_call(
+            [
+                "tests/test.mp4",
+                "-as",
+                "1",
+                "--keep-other-audio",
+                "--keep-original-audio",
+            ]
+        )
+        assert "Cannot use both" in stderr