From cda20d64e44d599b780aa945959a4d1271be26e3 Mon Sep 17 00:00:00 2001 From: Werner Robitza Date: Sat, 18 Oct 2025 20:52:32 +0200 Subject: [PATCH] feat: add selective audio stream normalization Add support for selective audio stream normalization, addressing issue #285. New CLI arguments: - `-as/--audio-streams`: Select specific audio streams by index (comma-separated) - `--audio-default-only`: Only normalize audio streams with 'default' disposition - `--keep-other-audio`: Keep non-selected streams as passthrough (unchanged) Implementation details: - Added `is_default` flag to AudioStream to track disposition - Updated MediaFile.parse_streams() to parse "(default)" flag from FFmpeg output - Added _get_streams_to_normalize() method to filter streams based on selection - Updated first pass, filter generation, and second pass for selected + passthrough streams - Proper output stream index tracking for correct codec assignment Validation: - Prevents using both `-as` and `--audio-default-only` together - Prevents using both `--keep-other-audio` and `--keep-original-audio` together Tests: - Added 6 new test cases covering all scenarios - All 46 tests passing (40 original + 6 new) Documentation: - Updated docs/usage/options.md with complete documentation for new options --- docs/usage/options.md | 26 ++++ src/ffmpeg_normalize/__main__.py | 53 ++++++++ src/ffmpeg_normalize/_ffmpeg_normalize.py | 24 ++++ src/ffmpeg_normalize/_media_file.py | 147 +++++++++++++++++++--- src/ffmpeg_normalize/_streams.py | 3 + tests/test_all.py | 73 +++++++++++ 6 files changed, 306 insertions(+), 20 deletions(-) diff --git a/docs/usage/options.md b/docs/usage/options.md index 9b1d85c..de98a2f 100644 --- a/docs/usage/options.md +++ b/docs/usage/options.md @@ -187,6 +187,32 @@ Set the number of audio channels. If not specified, the input channel layout wil Copy original, non-normalized audio streams to output file +## Audio Stream Selection + +### `-as AUDIO_STREAMS, --audio-streams AUDIO_STREAMS` + +Select specific audio streams to normalize by stream index (comma-separated). + +Example: `-as 1` normalizes only stream 1, `-as 1,2` normalizes streams 1 and 2. + +If not specified, all audio streams will be normalized (default behavior). + +### `--audio-default-only` + +Only normalize audio streams with the 'default' disposition flag. + +This is useful for files with multiple audio tracks where only the default track should be normalized (e.g., main audio track vs. commentary tracks). + +### `--keep-other-audio` + +Keep non-selected audio streams in the output file (copy without normalization). + +Must be combined with either `-as`/`--audio-streams` or `--audio-default-only`. + +Example: `ffmpeg-normalize input.mkv -as 1 --keep-other-audio` will normalize stream 1 and copy all other audio streams unchanged. + +**Note:** This option is mutually exclusive with `--keep-original-audio`. Use `--keep-original-audio` to keep all original streams alongside normalized ones, or `--keep-other-audio` to keep only non-selected streams as passthrough. + ### `-prf PRE_FILTER, --pre-filter PRE_FILTER` Add an audio filter chain before applying normalization. diff --git a/src/ffmpeg_normalize/__main__.py b/src/ffmpeg_normalize/__main__.py index 7a02c87..24d6977 100644 --- a/src/ffmpeg_normalize/__main__.py +++ b/src/ffmpeg_normalize/__main__.py @@ -315,6 +315,44 @@ def create_parser() -> argparse.ArgumentParser: ), ) + group_stream_selection = parser.add_argument_group("Audio Stream Selection") + group_stream_selection.add_argument( + "-as", + "--audio-streams", + type=str, + help=textwrap.dedent( + """\ + Select specific audio streams to normalize by stream index (comma-separated). + Example: --audio-streams 0,2 will normalize only streams 0 and 2. + + By default, all audio streams are normalized. + """ + ), + ) + group_stream_selection.add_argument( + "--audio-default-only", + action="store_true", + help=textwrap.dedent( + """\ + Only normalize audio streams with the 'default' disposition flag. + This is useful for files with multiple audio tracks where only the main track + should be normalized (e.g., keeping commentary tracks unchanged). + """ + ), + ) + group_stream_selection.add_argument( + "--keep-other-audio", + action="store_true", + help=textwrap.dedent( + """\ + Keep non-selected audio streams in the output file (copy without normalization). + Only applies when --audio-streams or --audio-default-only is used. + + By default, only selected streams are included in the output. + """ + ), + ) + group_acodec = parser.add_argument_group("Audio Encoding") group_acodec.add_argument( "-c:a", @@ -553,6 +591,18 @@ def _split_options(opts: str) -> list[str]: extra_input_options = _split_options(cli_args.extra_input_options) extra_output_options = _split_options(cli_args.extra_output_options) + # parse audio streams selection + audio_streams = None + if cli_args.audio_streams: + try: + audio_streams = [int(s.strip()) for s in cli_args.audio_streams.split(",")] + except ValueError: + error("Invalid audio stream indices. Must be comma-separated integers.") + + # validate stream selection options + if cli_args.audio_default_only and cli_args.audio_streams: + error("Cannot use both --audio-default-only and --audio-streams together.") + ffmpeg_normalize = FFmpegNormalize( normalization_type=cli_args.normalization_type, target_level=cli_args.target_level, @@ -586,6 +636,9 @@ def _split_options(opts: str) -> list[str]: dry_run=cli_args.dry_run, progress=cli_args.progress, replaygain=cli_args.replaygain, + audio_streams=audio_streams, + audio_default_only=cli_args.audio_default_only, + keep_other_audio=cli_args.keep_other_audio, ) if cli_args.output and len(cli_args.input) > len(cli_args.output): diff --git a/src/ffmpeg_normalize/_ffmpeg_normalize.py b/src/ffmpeg_normalize/_ffmpeg_normalize.py index 883a3ea..6a33cea 100644 --- a/src/ffmpeg_normalize/_ffmpeg_normalize.py +++ b/src/ffmpeg_normalize/_ffmpeg_normalize.py @@ -84,6 +84,9 @@ class FFmpegNormalize: debug (bool, optional): Debug. Defaults to False. progress (bool, optional): Progress. Defaults to False. replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False. + audio_streams (list[int] | None, optional): List of audio stream indices to normalize. Defaults to None (all streams). + audio_default_only (bool, optional): Only normalize audio streams with default disposition. Defaults to False. + keep_other_audio (bool, optional): Keep non-selected audio streams in output (copy without normalization). Defaults to False. Raises: FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter. @@ -124,6 +127,9 @@ def __init__( debug: bool = False, progress: bool = False, replaygain: bool = False, + audio_streams: list[int] | None = None, + audio_default_only: bool = False, + keep_other_audio: bool = False, ): self.ffmpeg_exe = get_ffmpeg_exe() self.has_loudnorm_capabilities = ffmpeg_has_loudnorm() @@ -207,6 +213,11 @@ def __init__( self.progress = progress self.replaygain = replaygain + # Stream selection options + self.audio_streams = audio_streams + self.audio_default_only = audio_default_only + self.keep_other_audio = keep_other_audio + if ( self.audio_codec is None or "pcm" in self.audio_codec ) and self.output_format in PCM_INCOMPATIBLE_FORMATS: @@ -221,6 +232,19 @@ def __init__( "ReplayGain only works for EBU normalization type for now." ) + # Validate stream selection options + if self.audio_streams is not None and self.audio_default_only: + raise FFmpegNormalizeError( + "Cannot use both audio_streams and audio_default_only together." + ) + + if self.keep_other_audio and self.keep_original_audio: + raise FFmpegNormalizeError( + "Cannot use both --keep-other-audio and --keep-original-audio together. " + "Use --keep-original-audio to keep all original streams alongside normalized ones, " + "or --keep-other-audio to keep only non-selected streams as passthrough." + ) + self.stats: list[LoudnessStatisticsWithMetadata] = [] self.media_files: list[MediaFile] = [] self.file_count = 0 diff --git a/src/ffmpeg_normalize/_media_file.py b/src/ffmpeg_normalize/_media_file.py index 2e464a3..36db1d2 100644 --- a/src/ffmpeg_normalize/_media_file.py +++ b/src/ffmpeg_normalize/_media_file.py @@ -136,6 +136,18 @@ def parse_streams(self) -> None: output_lines = [line.strip() for line in output.split("\n")] + # First pass: parse disposition flags for each stream + stream_dispositions: dict[int, bool] = {} + + for line in output_lines: + if line.startswith("Stream"): + if stream_id_match := re.search(r"#0:([\d]+)", line): + stream_id = int(stream_id_match.group(1)) + # Check if (default) appears on the Stream line + is_default = "(default)" in line + stream_dispositions[stream_id] = is_default + + # Second pass: parse stream information duration = None for line in output_lines: if "Duration" in line: @@ -155,8 +167,12 @@ def parse_streams(self) -> None: else: continue + is_default = stream_dispositions.get(stream_id, False) + if "Audio" in line: - _logger.debug(f"Found audio stream at index {stream_id}") + _logger.debug( + f"Found audio stream at index {stream_id} (default: {is_default})" + ) sample_rate_match = re.search(r"(\d+) Hz", line) sample_rate = ( int(sample_rate_match.group(1)) if sample_rate_match else None @@ -170,6 +186,7 @@ def parse_streams(self) -> None: sample_rate, bit_depth, duration, + is_default, ) elif "Video" in line: @@ -201,6 +218,53 @@ def parse_streams(self) -> None: self.streams["video"] = {} self.streams["subtitle"] = {} + def _get_streams_to_normalize(self) -> list[AudioStream]: + """ + Determine which audio streams to normalize based on configuration. + + Returns: + list[AudioStream]: List of audio streams to normalize + """ + all_audio_streams = list(self.streams["audio"].values()) + + if self.ffmpeg_normalize.audio_streams is not None: + # User specified specific stream indices + selected_streams = [ + stream + for stream in all_audio_streams + if stream.stream_id in self.ffmpeg_normalize.audio_streams + ] + if not selected_streams: + _logger.warning( + f"No audio streams found matching indices {self.ffmpeg_normalize.audio_streams}. " + f"Available streams: {[s.stream_id for s in all_audio_streams]}" + ) + else: + _logger.info( + f"Normalizing selected audio streams: {[s.stream_id for s in selected_streams]}" + ) + return selected_streams + + elif self.ffmpeg_normalize.audio_default_only: + # Only normalize streams with default disposition + default_streams = [ + stream for stream in all_audio_streams if stream.is_default + ] + if not default_streams: + _logger.warning( + "No audio streams with 'default' disposition found. " + f"Available streams: {[s.stream_id for s in all_audio_streams]}" + ) + else: + _logger.info( + f"Normalizing default audio streams: {[s.stream_id for s in default_streams]}" + ) + return default_streams + + else: + # Normalize all streams (default behavior) + return all_audio_streams + def run_normalization(self) -> None: """ Run the normalization process for this file. @@ -400,7 +464,9 @@ def _first_pass(self) -> None: """ _logger.debug(f"Parsing normalization info for {self.input_file}") - for index, audio_stream in enumerate(self.streams["audio"].values()): + streams_to_normalize = self._get_streams_to_normalize() + + for index, audio_stream in enumerate(streams_to_normalize): if self.ffmpeg_normalize.normalization_type == "ebu": fun = getattr(audio_stream, "parse_loudnorm_stats") else: @@ -410,7 +476,7 @@ def _first_pass(self) -> None: with tqdm( total=100, position=1, - desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}", + desc=f"Stream {index + 1}/{len(streams_to_normalize)}", bar_format=TQDM_BAR_FORMAT, ) as pbar: for progress in fun(): @@ -429,7 +495,9 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: filter_chains = [] output_labels = [] - for audio_stream in self.streams["audio"].values(): + streams_to_normalize = self._get_streams_to_normalize() + + for audio_stream in streams_to_normalize: skip_normalization = False if self.ffmpeg_normalize.lower_only: if self.ffmpeg_normalize.normalization_type == "ebu": @@ -551,29 +619,66 @@ def _second_pass(self) -> Iterator[float]: f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled." ) + # Determine streams to normalize and passthrough + streams_to_normalize = self._get_streams_to_normalize() + all_audio_streams = list(self.streams["audio"].values()) + + # Determine which streams to passthrough + if self.ffmpeg_normalize.keep_other_audio and ( + self.ffmpeg_normalize.audio_streams is not None + or self.ffmpeg_normalize.audio_default_only + ): + streams_to_passthrough = [ + s for s in all_audio_streams if s not in streams_to_normalize + ] + else: + streams_to_passthrough = [] + # ... and map the output of the normalization filters for ol in output_labels: cmd.extend(["-map", ol]) - # set audio codec (never copy) - if self.ffmpeg_normalize.audio_codec: - cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec]) - else: - for index, (_, audio_stream) in enumerate(self.streams["audio"].items()): - cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()]) + # ... and map passthrough audio streams (copy without normalization) + for stream in streams_to_passthrough: + cmd.extend(["-map", f"0:{stream.stream_id}"]) - # other audio options (if any) + # Track output audio stream index for codec assignment + output_audio_idx = 0 + + # set audio codec for normalized streams + for audio_stream in streams_to_normalize: + if self.ffmpeg_normalize.audio_codec: + codec = self.ffmpeg_normalize.audio_codec + else: + codec = audio_stream.get_pcm_codec() + cmd.extend([f"-c:a:{output_audio_idx}", codec]) + output_audio_idx += 1 + + # set audio codec for passthrough streams (always copy) + for _ in streams_to_passthrough: + cmd.extend([f"-c:a:{output_audio_idx}", "copy"]) + output_audio_idx += 1 + + # other audio options (if any) - only apply to normalized streams if self.ffmpeg_normalize.audio_bitrate: if self.ffmpeg_normalize.audio_codec == "libvorbis": # libvorbis takes just a "-b" option, for some reason # https://github.com/slhck/ffmpeg-normalize/issues/277 cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)]) else: - cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)]) + # Only apply to normalized streams + for idx in range(len(streams_to_normalize)): + cmd.extend( + [f"-b:a:{idx}", str(self.ffmpeg_normalize.audio_bitrate)] + ) if self.ffmpeg_normalize.sample_rate: - cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)]) + # Only apply to normalized streams + for idx in range(len(streams_to_normalize)): + cmd.extend([f"-ar:a:{idx}", str(self.ffmpeg_normalize.sample_rate)]) if self.ffmpeg_normalize.audio_channels: - cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)]) + # Only apply to normalized streams + for idx in range(len(streams_to_normalize)): + cmd.extend([f"-ac:a:{idx}", str(self.ffmpeg_normalize.audio_channels)]) # ... and subtitles if not self.ffmpeg_normalize.subtitle_disable: @@ -583,10 +688,11 @@ def _second_pass(self) -> Iterator[float]: cmd.extend(["-c:s", "copy"]) if self.ffmpeg_normalize.keep_original_audio: - highest_index = len(self.streams["audio"]) + # Map all original audio streams after normalized and passthrough streams for index, _ in enumerate(self.streams["audio"].items()): cmd.extend(["-map", f"0:a:{index}"]) - cmd.extend([f"-c:a:{highest_index + index}", "copy"]) + cmd.extend([f"-c:a:{output_audio_idx}", "copy"]) + output_audio_idx += 1 # extra options (if any) if self.ffmpeg_normalize.extra_output_options: @@ -645,13 +751,14 @@ def _second_pass(self) -> Iterator[float]: ebu_pass_2_stats = list( AudioStream.prune_and_parse_loudnorm_output(output).values() ) - # Only set second pass stats if they exist (they might not if all streams were skipped with --lower-only) - if len(ebu_pass_2_stats) == len(self.streams["audio"]): - for idx, audio_stream in enumerate(self.streams["audio"].values()): + # Only set second pass stats for streams that were actually normalized + streams_to_normalize = self._get_streams_to_normalize() + if len(ebu_pass_2_stats) == len(streams_to_normalize): + for idx, audio_stream in enumerate(streams_to_normalize): audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx]) else: _logger.debug( - f"Expected {len(self.streams['audio'])} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. " + f"Expected {len(streams_to_normalize)} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. " "This can happen when normalization is skipped (e.g., with --lower-only)." ) diff --git a/src/ffmpeg_normalize/_streams.py b/src/ffmpeg_normalize/_streams.py index 592d704..12b1e77 100644 --- a/src/ffmpeg_normalize/_streams.py +++ b/src/ffmpeg_normalize/_streams.py @@ -99,6 +99,7 @@ def __init__( sample_rate: int | None, bit_depth: int | None, duration: float | None, + is_default: bool = False, ): """ Create an AudioStream object. @@ -110,6 +111,7 @@ def __init__( sample_rate (int): sample rate in Hz bit_depth (int): bit depth in bits duration (float): duration in seconds + is_default (bool): Whether this stream has the default disposition flag """ super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) @@ -124,6 +126,7 @@ def __init__( self.bit_depth = bit_depth self.duration = duration + self.is_default = is_default @staticmethod def _constrain( diff --git a/tests/test_all.py b/tests/test_all.py index b50e399..b9c82c2 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -526,3 +526,76 @@ def test_replaygain(self): # git checkout the files! for file in REPLAYGAIN_FILES: subprocess.run(["git", "checkout", file], check=False) + + def test_audio_streams_single(self): + """Test normalizing only a single audio stream""" + ffmpeg_normalize_call(["tests/test.mp4", "-as", "1", "-nt", "ebu"]) + assert os.path.isfile("normalized/test.mkv") + # Check that output has only 1 audio stream + streams = _get_stream_info("normalized/test.mkv") + audio_streams = [s for s in streams if s["codec_type"] == "audio"] + assert len(audio_streams) == 1 + # Verify stats show only one stream was normalized + stats = _get_stats("normalized/test.mkv", "ebu") + assert len(stats) == 1 + assert stats[0]["stream_id"] == 1 + + def test_audio_streams_multiple(self): + """Test normalizing multiple specific audio streams""" + ffmpeg_normalize_call(["tests/test.mp4", "-as", "1,2", "-nt", "ebu"]) + assert os.path.isfile("normalized/test.mkv") + # Check that output has 2 audio streams + streams = _get_stream_info("normalized/test.mkv") + audio_streams = [s for s in streams if s["codec_type"] == "audio"] + assert len(audio_streams) == 2 + # Verify stats show both streams were normalized + stats = _get_stats("normalized/test.mkv", "ebu") + assert len(stats) == 2 + + def test_audio_streams_with_keep_other(self): + """Test normalizing one stream while keeping others as passthrough""" + ffmpeg_normalize_call( + ["tests/test.mp4", "-as", "1", "--keep-other-audio", "-nt", "ebu"] + ) + assert os.path.isfile("normalized/test.mkv") + # Check that output has 2 audio streams (1 normalized, 1 passthrough) + streams = _get_stream_info("normalized/test.mkv") + audio_streams = [s for s in streams if s["codec_type"] == "audio"] + assert len(audio_streams) == 2 + # First audio stream should be normalized (PCM or similar) + assert "pcm" in audio_streams[0]["codec_name"] + # Second audio stream should be copied (ac3) + assert audio_streams[1]["codec_name"] == "ac3" + + def test_audio_default_only(self): + """Test normalizing only default audio streams""" + # Note: test.mp4 has both audio streams marked as default + ffmpeg_normalize_call(["tests/test.mp4", "--audio-default-only", "-nt", "ebu"]) + assert os.path.isfile("normalized/test.mkv") + # Since both streams are default, both should be normalized + streams = _get_stream_info("normalized/test.mkv") + audio_streams = [s for s in streams if s["codec_type"] == "audio"] + assert len(audio_streams) == 2 + # Verify stats show both streams were normalized + stats = _get_stats("normalized/test.mkv", "ebu") + assert len(stats) == 2 + + def test_audio_streams_invalid_option_combination(self): + """Test that using both --audio-streams and --audio-default-only fails""" + _, stderr = ffmpeg_normalize_call( + ["tests/test.mp4", "-as", "1", "--audio-default-only"] + ) + assert "Cannot use both" in stderr + + def test_keep_other_and_keep_original_conflict(self): + """Test that using both --keep-other-audio and --keep-original-audio fails""" + _, stderr = ffmpeg_normalize_call( + [ + "tests/test.mp4", + "-as", + "1", + "--keep-other-audio", + "--keep-original-audio", + ] + ) + assert "Cannot use both" in stderr