Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions docs/usage/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,32 @@ Set the number of audio channels. If not specified, the input channel layout wil

Copy original, non-normalized audio streams to output file

## Audio Stream Selection

### `-as AUDIO_STREAMS, --audio-streams AUDIO_STREAMS`

Select specific audio streams to normalize by stream index (comma-separated).

Example: `-as 1` normalizes only stream 1, `-as 1,2` normalizes streams 1 and 2.

If not specified, all audio streams will be normalized (default behavior).

### `--audio-default-only`

Only normalize audio streams with the 'default' disposition flag.

This is useful for files with multiple audio tracks where only the default track should be normalized (e.g., main audio track vs. commentary tracks).

### `--keep-other-audio`

Keep non-selected audio streams in the output file (copy without normalization).

Must be combined with either `-as`/`--audio-streams` or `--audio-default-only`.

Example: `ffmpeg-normalize input.mkv -as 1 --keep-other-audio` will normalize stream 1 and copy all other audio streams unchanged.

**Note:** This option is mutually exclusive with `--keep-original-audio`. Use `--keep-original-audio` to keep all original streams alongside normalized ones, or `--keep-other-audio` to keep only non-selected streams as passthrough.

### `-prf PRE_FILTER, --pre-filter PRE_FILTER`

Add an audio filter chain before applying normalization.
Expand Down
53 changes: 53 additions & 0 deletions src/ffmpeg_normalize/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,44 @@ def create_parser() -> argparse.ArgumentParser:
),
)

group_stream_selection = parser.add_argument_group("Audio Stream Selection")
group_stream_selection.add_argument(
"-as",
"--audio-streams",
type=str,
help=textwrap.dedent(
"""\
Select specific audio streams to normalize by stream index (comma-separated).
Example: --audio-streams 0,2 will normalize only streams 0 and 2.

By default, all audio streams are normalized.
"""
),
)
group_stream_selection.add_argument(
"--audio-default-only",
action="store_true",
help=textwrap.dedent(
"""\
Only normalize audio streams with the 'default' disposition flag.
This is useful for files with multiple audio tracks where only the main track
should be normalized (e.g., keeping commentary tracks unchanged).
"""
),
)
group_stream_selection.add_argument(
"--keep-other-audio",
action="store_true",
help=textwrap.dedent(
"""\
Keep non-selected audio streams in the output file (copy without normalization).
Only applies when --audio-streams or --audio-default-only is used.

By default, only selected streams are included in the output.
"""
),
)

group_acodec = parser.add_argument_group("Audio Encoding")
group_acodec.add_argument(
"-c:a",
Expand Down Expand Up @@ -553,6 +591,18 @@ def _split_options(opts: str) -> list[str]:
extra_input_options = _split_options(cli_args.extra_input_options)
extra_output_options = _split_options(cli_args.extra_output_options)

# parse audio streams selection
audio_streams = None
if cli_args.audio_streams:
try:
audio_streams = [int(s.strip()) for s in cli_args.audio_streams.split(",")]
except ValueError:
error("Invalid audio stream indices. Must be comma-separated integers.")

# validate stream selection options
if cli_args.audio_default_only and cli_args.audio_streams:
error("Cannot use both --audio-default-only and --audio-streams together.")

ffmpeg_normalize = FFmpegNormalize(
normalization_type=cli_args.normalization_type,
target_level=cli_args.target_level,
Expand Down Expand Up @@ -586,6 +636,9 @@ def _split_options(opts: str) -> list[str]:
dry_run=cli_args.dry_run,
progress=cli_args.progress,
replaygain=cli_args.replaygain,
audio_streams=audio_streams,
audio_default_only=cli_args.audio_default_only,
keep_other_audio=cli_args.keep_other_audio,
)

if cli_args.output and len(cli_args.input) > len(cli_args.output):
Expand Down
24 changes: 24 additions & 0 deletions src/ffmpeg_normalize/_ffmpeg_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ class FFmpegNormalize:
debug (bool, optional): Debug. Defaults to False.
progress (bool, optional): Progress. Defaults to False.
replaygain (bool, optional): Write ReplayGain tags without normalizing. Defaults to False.
audio_streams (list[int] | None, optional): List of audio stream indices to normalize. Defaults to None (all streams).
audio_default_only (bool, optional): Only normalize audio streams with default disposition. Defaults to False.
keep_other_audio (bool, optional): Keep non-selected audio streams in output (copy without normalization). Defaults to False.

Raises:
FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
Expand Down Expand Up @@ -124,6 +127,9 @@ def __init__(
debug: bool = False,
progress: bool = False,
replaygain: bool = False,
audio_streams: list[int] | None = None,
audio_default_only: bool = False,
keep_other_audio: bool = False,
):
self.ffmpeg_exe = get_ffmpeg_exe()
self.has_loudnorm_capabilities = ffmpeg_has_loudnorm()
Expand Down Expand Up @@ -207,6 +213,11 @@ def __init__(
self.progress = progress
self.replaygain = replaygain

# Stream selection options
self.audio_streams = audio_streams
self.audio_default_only = audio_default_only
self.keep_other_audio = keep_other_audio

if (
self.audio_codec is None or "pcm" in self.audio_codec
) and self.output_format in PCM_INCOMPATIBLE_FORMATS:
Expand All @@ -221,6 +232,19 @@ def __init__(
"ReplayGain only works for EBU normalization type for now."
)

# Validate stream selection options
if self.audio_streams is not None and self.audio_default_only:
raise FFmpegNormalizeError(
"Cannot use both audio_streams and audio_default_only together."
)

if self.keep_other_audio and self.keep_original_audio:
raise FFmpegNormalizeError(
"Cannot use both --keep-other-audio and --keep-original-audio together. "
"Use --keep-original-audio to keep all original streams alongside normalized ones, "
"or --keep-other-audio to keep only non-selected streams as passthrough."
)

self.stats: list[LoudnessStatisticsWithMetadata] = []
self.media_files: list[MediaFile] = []
self.file_count = 0
Expand Down
147 changes: 127 additions & 20 deletions src/ffmpeg_normalize/_media_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,18 @@ def parse_streams(self) -> None:

output_lines = [line.strip() for line in output.split("\n")]

# First pass: parse disposition flags for each stream
stream_dispositions: dict[int, bool] = {}

for line in output_lines:
if line.startswith("Stream"):
if stream_id_match := re.search(r"#0:([\d]+)", line):
stream_id = int(stream_id_match.group(1))
# Check if (default) appears on the Stream line
is_default = "(default)" in line
stream_dispositions[stream_id] = is_default

# Second pass: parse stream information
duration = None
for line in output_lines:
if "Duration" in line:
Expand All @@ -155,8 +167,12 @@ def parse_streams(self) -> None:
else:
continue

is_default = stream_dispositions.get(stream_id, False)

if "Audio" in line:
_logger.debug(f"Found audio stream at index {stream_id}")
_logger.debug(
f"Found audio stream at index {stream_id} (default: {is_default})"
)
sample_rate_match = re.search(r"(\d+) Hz", line)
sample_rate = (
int(sample_rate_match.group(1)) if sample_rate_match else None
Expand All @@ -170,6 +186,7 @@ def parse_streams(self) -> None:
sample_rate,
bit_depth,
duration,
is_default,
)

elif "Video" in line:
Expand Down Expand Up @@ -201,6 +218,53 @@ def parse_streams(self) -> None:
self.streams["video"] = {}
self.streams["subtitle"] = {}

def _get_streams_to_normalize(self) -> list[AudioStream]:
"""
Determine which audio streams to normalize based on configuration.

Returns:
list[AudioStream]: List of audio streams to normalize
"""
all_audio_streams = list(self.streams["audio"].values())

if self.ffmpeg_normalize.audio_streams is not None:
# User specified specific stream indices
selected_streams = [
stream
for stream in all_audio_streams
if stream.stream_id in self.ffmpeg_normalize.audio_streams
]
if not selected_streams:
_logger.warning(
f"No audio streams found matching indices {self.ffmpeg_normalize.audio_streams}. "
f"Available streams: {[s.stream_id for s in all_audio_streams]}"
)
else:
_logger.info(
f"Normalizing selected audio streams: {[s.stream_id for s in selected_streams]}"
)
return selected_streams

elif self.ffmpeg_normalize.audio_default_only:
# Only normalize streams with default disposition
default_streams = [
stream for stream in all_audio_streams if stream.is_default
]
if not default_streams:
_logger.warning(
"No audio streams with 'default' disposition found. "
f"Available streams: {[s.stream_id for s in all_audio_streams]}"
)
else:
_logger.info(
f"Normalizing default audio streams: {[s.stream_id for s in default_streams]}"
)
return default_streams

else:
# Normalize all streams (default behavior)
return all_audio_streams

def run_normalization(self) -> None:
"""
Run the normalization process for this file.
Expand Down Expand Up @@ -400,7 +464,9 @@ def _first_pass(self) -> None:
"""
_logger.debug(f"Parsing normalization info for {self.input_file}")

for index, audio_stream in enumerate(self.streams["audio"].values()):
streams_to_normalize = self._get_streams_to_normalize()

for index, audio_stream in enumerate(streams_to_normalize):
if self.ffmpeg_normalize.normalization_type == "ebu":
fun = getattr(audio_stream, "parse_loudnorm_stats")
else:
Expand All @@ -410,7 +476,7 @@ def _first_pass(self) -> None:
with tqdm(
total=100,
position=1,
desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}",
desc=f"Stream {index + 1}/{len(streams_to_normalize)}",
bar_format=TQDM_BAR_FORMAT,
) as pbar:
for progress in fun():
Expand All @@ -429,7 +495,9 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
filter_chains = []
output_labels = []

for audio_stream in self.streams["audio"].values():
streams_to_normalize = self._get_streams_to_normalize()

for audio_stream in streams_to_normalize:
skip_normalization = False
if self.ffmpeg_normalize.lower_only:
if self.ffmpeg_normalize.normalization_type == "ebu":
Expand Down Expand Up @@ -551,29 +619,66 @@ def _second_pass(self) -> Iterator[float]:
f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled."
)

# Determine streams to normalize and passthrough
streams_to_normalize = self._get_streams_to_normalize()
all_audio_streams = list(self.streams["audio"].values())

# Determine which streams to passthrough
if self.ffmpeg_normalize.keep_other_audio and (
self.ffmpeg_normalize.audio_streams is not None
or self.ffmpeg_normalize.audio_default_only
):
streams_to_passthrough = [
s for s in all_audio_streams if s not in streams_to_normalize
]
else:
streams_to_passthrough = []

# ... and map the output of the normalization filters
for ol in output_labels:
cmd.extend(["-map", ol])

# set audio codec (never copy)
if self.ffmpeg_normalize.audio_codec:
cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec])
else:
for index, (_, audio_stream) in enumerate(self.streams["audio"].items()):
cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()])
# ... and map passthrough audio streams (copy without normalization)
for stream in streams_to_passthrough:
cmd.extend(["-map", f"0:{stream.stream_id}"])

# other audio options (if any)
# Track output audio stream index for codec assignment
output_audio_idx = 0

# set audio codec for normalized streams
for audio_stream in streams_to_normalize:
if self.ffmpeg_normalize.audio_codec:
codec = self.ffmpeg_normalize.audio_codec
else:
codec = audio_stream.get_pcm_codec()
cmd.extend([f"-c:a:{output_audio_idx}", codec])
output_audio_idx += 1

# set audio codec for passthrough streams (always copy)
for _ in streams_to_passthrough:
cmd.extend([f"-c:a:{output_audio_idx}", "copy"])
output_audio_idx += 1

# other audio options (if any) - only apply to normalized streams
if self.ffmpeg_normalize.audio_bitrate:
if self.ffmpeg_normalize.audio_codec == "libvorbis":
# libvorbis takes just a "-b" option, for some reason
# https://github.com/slhck/ffmpeg-normalize/issues/277
cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)])
else:
cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)])
# Only apply to normalized streams
for idx in range(len(streams_to_normalize)):
cmd.extend(
[f"-b:a:{idx}", str(self.ffmpeg_normalize.audio_bitrate)]
)
if self.ffmpeg_normalize.sample_rate:
cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)])
# Only apply to normalized streams
for idx in range(len(streams_to_normalize)):
cmd.extend([f"-ar:a:{idx}", str(self.ffmpeg_normalize.sample_rate)])
if self.ffmpeg_normalize.audio_channels:
cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)])
# Only apply to normalized streams
for idx in range(len(streams_to_normalize)):
cmd.extend([f"-ac:a:{idx}", str(self.ffmpeg_normalize.audio_channels)])

# ... and subtitles
if not self.ffmpeg_normalize.subtitle_disable:
Expand All @@ -583,10 +688,11 @@ def _second_pass(self) -> Iterator[float]:
cmd.extend(["-c:s", "copy"])

if self.ffmpeg_normalize.keep_original_audio:
highest_index = len(self.streams["audio"])
# Map all original audio streams after normalized and passthrough streams
for index, _ in enumerate(self.streams["audio"].items()):
cmd.extend(["-map", f"0:a:{index}"])
cmd.extend([f"-c:a:{highest_index + index}", "copy"])
cmd.extend([f"-c:a:{output_audio_idx}", "copy"])
output_audio_idx += 1

# extra options (if any)
if self.ffmpeg_normalize.extra_output_options:
Expand Down Expand Up @@ -645,13 +751,14 @@ def _second_pass(self) -> Iterator[float]:
ebu_pass_2_stats = list(
AudioStream.prune_and_parse_loudnorm_output(output).values()
)
# Only set second pass stats if they exist (they might not if all streams were skipped with --lower-only)
if len(ebu_pass_2_stats) == len(self.streams["audio"]):
for idx, audio_stream in enumerate(self.streams["audio"].values()):
# Only set second pass stats for streams that were actually normalized
streams_to_normalize = self._get_streams_to_normalize()
if len(ebu_pass_2_stats) == len(streams_to_normalize):
for idx, audio_stream in enumerate(streams_to_normalize):
audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx])
else:
_logger.debug(
f"Expected {len(self.streams['audio'])} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. "
f"Expected {len(streams_to_normalize)} EBU pass 2 statistics but got {len(ebu_pass_2_stats)}. "
"This can happen when normalization is skipped (e.g., with --lower-only)."
)

Expand Down
Loading