From 06021b855d6bc1c65140c977705617240e74cf65 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 16 Dec 2025 22:34:57 +0900 Subject: [PATCH 1/9] add new mode in mlsadf --- diffsptk/modules/mglsadf.py | 235 ++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + tests/test_mglsadf.py | 52 +++++++- tests/utils.py | 11 +- 4 files changed, 286 insertions(+), 13 deletions(-) diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index 38caffc..475de82 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -16,21 +16,24 @@ from typing import Any +import mpmath as mp import numpy as np import torch import torch.nn.functional as F from torch import nn -from ..utils.private import Lambda, check_size, get_gamma, remove_gain +from ..utils.private import Lambda, check_size, get_gamma, remove_gain, to from .b2mc import MLSADigitalFilterCoefficientsToMelCepstrum from .base import BaseNonFunctionalModule from .c2mpir import CepstrumToMinimumPhaseImpulseResponse +from .frame import Frame from .gnorm import GeneralizedCepstrumGainNormalization from .istft import InverseShortTimeFourierTransform from .linear_intpl import LinearInterpolation from .mc2b import MelCepstrumToMLSADigitalFilterCoefficients from .mgc2mgc import MelGeneralizedCepstrumToMelGeneralizedCepstrum from .mgc2sp import MelGeneralizedCepstrumToSpectrum +from .root_pol import PolynomialToRoots from .stft import ShortTimeFourierTransform @@ -74,12 +77,13 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): phase : ['minimum', 'maximum', 'zero', 'mixed'] The filter type. - mode : ['multi-stage', 'single-stage', 'freq-domain'] + mode : ['multi-stage', 'single-stage', 'freq-domain', 'pade-approx'] 'multi-stage' approximates the MLSA filter by cascading FIR filters based on the Taylor series expansion. 'single-stage' uses an FIR filter with the coefficients derived from the impulse response converted from the input mel-cepstral coefficients using FFT. 'freq-domain' performs filtering in the frequency domain - rather than the time domain. + rather than the time domain. 'pade-approx' implements the MLSA filter by + cascading all-zero and all-pole filters based on the factorization. n_fft : int >= 1 The number of FFT bins used for conversion. Higher values result in increased @@ -89,12 +93,28 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): The order of the Taylor series expansion (valid only if **mode** is 'multi-stage'). + pade_order : int >= 4 + The order of Pade approximation (valid only if **mode** is 'pade-approx'). + cep_order : int >= 0 or tuple[int, int] - The order of the linear cepstrum (valid only if **mode** is 'multi-stage'). + The order of the linear cepstrum (valid only if **mode** is 'multi-stage' or + 'pade-approx'). ir_length : int >= 1 or tuple[int, int] The length of the impulse response (valid only if **mode** is 'single-stage'). + chunk_length : int >= 1 or None + The chunk length for processing. If None is given, chunking is not applied + (valid only if **mode** is 'pade-approx') + + warmup_length : int >= 0 or None + The warm-up length for chunk processing. This is required to reduce artifacts + caused by chunking (valid only if **mode** is 'pade-approx'). + + learnable : bool + If True, the polynomial coefficients used in the approximation are learnable + (valid only if **mode** is 'multi-stage' or 'pade-approx'). + device : torch.device or None The device of this module. @@ -181,6 +201,16 @@ def flip(x): phase=phase, **modified_kwargs, ) + elif mode == "pade-approx": + self.mglsadf = MultiStageIIRFilter( + flipped_filter_order, + frame_period, + alpha=alpha, + gamma=gamma, + ignore_gain=ignore_gain, + phase=phase, + **modified_kwargs, + ) else: raise ValueError(f"mode {mode} is not supported.") @@ -238,6 +268,7 @@ def __init__( taylor_order: int = 20, cep_order: tuple[int, int] | int = 199, n_fft: int = 512, + learnable: bool = False, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: @@ -248,7 +279,6 @@ def __init__( self.ignore_gain = ignore_gain self.phase = phase - self.taylor_order = taylor_order if alpha == 0 and gamma == 0: cep_order = filter_order @@ -294,6 +324,19 @@ def __init__( self.linear_intpl = LinearInterpolation(frame_period) + cp = mp.taylor(mp.exp, 0, taylor_order) + cp = np.array([float(x) for x in cp]) + weights = cp[1:] / cp[:-1] + weights = np.insert(weights, 0, 1) + self.register_buffer("weights", to(weights, device=device, dtype=dtype)) + + a = np.ones(taylor_order + 1) + a = to(a, device=device, dtype=dtype) + if learnable: + self.a = nn.Parameter(a) + else: + self.register_buffer("a", a) + def forward( self, x: torch.Tensor, @@ -322,12 +365,12 @@ def forward( c = self.linear_intpl(c) - y = x.clone() - for a in range(1, self.taylor_order + 1): + y = x * self.a[0] + for i in range(1, len(self.a)): x = self.pad(x) x = x.unfold(-1, c.size(-1), 1) - x = (x * c).sum(-1) / a - y += x + x = (x * c).sum(-1) * self.weights[i] + y += x * self.a[i] if not self.ignore_gain: K = torch.exp(self.linear_intpl(c0)) @@ -586,3 +629,177 @@ def forward( Y = H * X y = self.istft(Y, out_length=x.size(-1)) return y + + +class MultiStageIIRFilter(nn.Module): + def __init__( + self, + filter_order: tuple[int, int] | int, + frame_period: int, + *, + alpha: float = 0, + gamma: float = 0, + ignore_gain: bool = False, + phase: str = "minimum", + pade_order: int = 5, + cep_order: tuple[int, int] | int = 199, + n_fft: int = 512, + chunk_length: int | None = None, + warmup_length: int | None = None, + learnable: bool = False, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ) -> None: + super().__init__() + + if phase != "minimum": + raise ValueError("Only minimum-phase filter is supported.") + if pade_order <= 3 or 15 <= pade_order: + raise ValueError("pade_order must be in [4, 14].") + + self.ignore_gain = ignore_gain + + self.mgc2c = MelGeneralizedCepstrumToMelGeneralizedCepstrum( + filter_order, + cep_order, + in_alpha=alpha, + in_gamma=gamma, + n_fft=n_fft, + device=device, + dtype=dtype, + ) + self.linear_intpl = LinearInterpolation(frame_period) + self.root_pol = PolynomialToRoots(pade_order, device=device, dtype=dtype) + + from torchlpc import sample_wise_lpc + + self.sample_wise_lpc = sample_wise_lpc + + if chunk_length is None: + self.chuking = False + else: + self.chuking = True + self.warmup_length = ( + warmup_length if warmup_length is not None else cep_order + ) + if chunk_length <= 0: + raise ValueError("chunk_length must be positive.") + if self.warmup_length < 0: + raise ValueError("warmup_length must be non-negative.") + frame_period = chunk_length - self.warmup_length + self.frame_x = Frame(chunk_length, frame_period, center=False) + self.frame_c = Frame( + cep_order * chunk_length, cep_order * frame_period, center=False + ) + + if pade_order == 4: + modified_pade_coefficients = [ + 0.4999273, + 0.1067005, + 0.01170221, + 0.0005656279, + ] + elif pade_order == 5: + modified_pade_coefficients = [ + 0.4999391, + 0.1107098, + 0.01369984, + 0.0009564853, + 0.00003041721, + ] + elif pade_order == 6: + modified_pade_coefficients = [ + 0.499962892438014, + 0.113301885013440, + 0.014990477313604, + 0.001229199693052, + 0.000059608811847, + 0.000001343163774, + ] + elif pade_order == 7: + modified_pade_coefficients = [ + 0.499969087072637, + 0.115077033090460, + 0.015876603489178, + 0.001424479579072, + 0.000083492347365, + 0.000002972456979, + 0.000000049755937, + ] + + cr = mp.taylor(mp.exp, 0, pade_order * 2) + cp, cq = mp.pade(cr, pade_order, pade_order) + cp = np.array([float(x) for x in cp]) + weights = cp[1:] / cp[:-1] + weights = np.insert(weights, 0, 1) + self.register_buffer("weights", to(weights, device=device, dtype=dtype)) + + if 4 <= pade_order <= 7: + modified_pade_coefficients.insert(0, 1) + modified_pade_coefficients = np.array(modified_pade_coefficients) + a = modified_pade_coefficients / cp + else: + a = np.ones(pade_order + 1) + a = to(a, device=device, dtype=dtype) + if learnable: + self.a = nn.Parameter(a) + else: + self.register_buffer("a", a) + + def forward(self, x: torch.Tensor, mc: torch.Tensor) -> torch.Tensor: + if x.dim() == 1: + x = x.unsqueeze(0) + mc = mc.unsqueeze(0) + unsqueezed = True + else: + unsqueezed = False + + if x.dim() != 2 or mc.dim() != 3: + raise ValueError("x and mc must be 2-D and 3-D tensors, respectively.") + + c = self.mgc2c(mc) + c0, c = remove_gain(c, value=0, return_gain=True) + c_b = self.linear_intpl(c.flip(-1)) + c_a = self.linear_intpl(c[..., 1:]) + + T = x.size(-1) + B, _, M = c_a.size() + + y = x * self.a[0] + for i in range(1, len(self.a)): + x = F.pad(x, (M, 0)) + x = x.unfold(-1, M + 1, 1) + x = (x * c_b).sum(-1) * self.weights[i] + y += x * self.a[i] + + if self.chuking: + y = F.pad(y, (self.warmup_length, 0)) + y = self.frame_x(y) + y = y.reshape(-1, y.size(-1)) + + c_a = c_a.reshape(B, -1) + c_a = F.pad(c_a, (M * self.warmup_length, 0)) + c_a = self.frame_c(c_a) + c_a = c_a.reshape(y.size(0), y.size(1), M) + + pade_coefficients = torch.cumprod(self.weights, 0) * self.a + roots = self.root_pol(pade_coefficients.flip(0)) + p = torch.reciprocal(roots) + + y = y.to(torch.complex64 if y.dtype == torch.float32 else torch.complex128) + for i in range(len(p)): + y = self.sample_wise_lpc(y, p[i] * c_a) + y = y.real + + if self.chuking: + y = y[..., self.warmup_length :] + y = y.reshape(B, -1) + y = y[..., :T] + + if not self.ignore_gain: + K = torch.exp(self.linear_intpl(c0)) + y = y * K.squeeze(-1) + + if unsqueezed: + y = y.squeeze(0) + return y diff --git a/pyproject.toml b/pyproject.toml index 8b6d867..43f625b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ dependencies = [ "numpy >= 1.23.0", "scipy >= 1.12.0", + "mpmath >= 0.17.0", "tqdm >= 4.63.0", "soundfile >= 0.10.2", "torch >= 2.3.1", diff --git a/tests/test_mglsadf.py b/tests/test_mglsadf.py index 034486f..a48abfe 100644 --- a/tests/test_mglsadf.py +++ b/tests/test_mglsadf.py @@ -25,7 +25,9 @@ @pytest.mark.parametrize("ignore_gain", [False, True]) -@pytest.mark.parametrize("mode", ["multi-stage", "single-stage", "freq-domain"]) +@pytest.mark.parametrize( + "mode", ["multi-stage", "single-stage", "freq-domain", "pade-approx"] +) @pytest.mark.parametrize("c", [0, 2]) def test_compatibility( device, @@ -50,6 +52,8 @@ def test_compatibility( "fft_length": fft_length, "window": "hamming", } + elif mode == "pade-approx": + params = {"pade_order": 5, "cep_order": 100} mglsadf = diffsptk.MLSA( M, @@ -218,3 +222,49 @@ def test_mixed_phase( U.check_differentiability(device, dtype, mglsadf1, [(B, S), (B, S // P, 2 * M + 1)]) U.check_differentiability(device, dtype, mglsadf2, [(B, S), (B, S // P, 2 * M + 1)]) U.check_differentiability(device, dtype, mglsadf3, [(B, S), (B, S // P, 2 * M + 1)]) + + +@pytest.mark.parametrize("pade_order", [4, 5, 6, 7, 8]) +@pytest.mark.parametrize("chunk_length", [None, 1000]) +def test_pade_approx( + device, dtype, pade_order, chunk_length, alpha=0.42, M=24, P=80, L1=400, L2=512 +): + params = {"pade_order": pade_order, "cep_order": 100, "chunk_length": chunk_length} + + mglsadf = diffsptk.MLSA( + M, + P, + alpha=alpha, + mode="pade-approx", + device=device, + dtype=dtype, + **params, + ) + + tmp1 = "mglsadf.tmp1" + tmp2 = "mglsadf.tmp2" + T = os.path.getsize("tools/SPTK/asset/data.short") // 2 + cmd1 = f"nrand -l {T} > {tmp1}" + cmd2 = ( + f"x2x +sd tools/SPTK/asset/data.short | " + f"frame -p {P} -l {L1} | " + f"window -w 1 -n 1 -l {L1} -L {L2} | " + f"mgcep -a {alpha} -m {M} -l {L2} -E -60 > {tmp2}" + ) + U.check_compatibility( + device, + dtype, + mglsadf, + [cmd1, cmd2], + [f"cat {tmp1}", f"cat {tmp2}"], + f"mglsadf {tmp2} < {tmp1} -m {M} -p {P} -P {min(pade_order, 7)} -a {alpha}", + [f"rm {tmp1} {tmp2}"], + dx=[None, M + 1], + eq=lambda a, b: np.corrcoef(a, b)[0, 1] > 0.98, + ) + + +@pytest.mark.parametrize("mode", ["multi-stage", "pade-approx"]) +def test_learnable(mode, M=24, P=80, T=160): + mglsadf = diffsptk.MLSA(M, P, mode=mode, learnable=True) + U.check_learnable(mglsadf, [(T,), (T // P, M + 1)]) diff --git a/tests/utils.py b/tests/utils.py index 684bcec..dc86db8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -279,7 +279,13 @@ def check_various_shape(module, shapes, *, preprocess=None): assert torch.allclose(y, target) -def check_learnable(module, shape, complex_input=False): +def check_learnable(module, shapes, complex_input=False): + x = [] + if not is_array(shapes[0]): + shapes = [shapes] + for shape in shapes: + x.append(torch.randn(*shape)) + dtype = None if complex_input: dtype = dtype_to_complex_dtype(dtype) @@ -289,8 +295,7 @@ def check_learnable(module, shape, complex_input=False): params_before.append(p.clone()) optimizer = torch.optim.SGD(module.parameters(), lr=0.01) - x = torch.randn(*shape, dtype=dtype) - y = module(x) + y = module(*x) optimizer.zero_grad() loss = y.mean() loss.backward() From 1daac86f00b8d831ab4290dc7fd3f6addc2bc842 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 13 Jan 2026 13:09:09 +0900 Subject: [PATCH 2/9] fix computational instability --- diffsptk/modules/mglsadf.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index 475de82..0546a97 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -746,7 +746,9 @@ def __init__( else: self.register_buffer("a", a) - def forward(self, x: torch.Tensor, mc: torch.Tensor) -> torch.Tensor: + def forward( + self, x: torch.Tensor, mc: torch.Tensor, return_roots: bool = True + ) -> torch.Tensor: if x.dim() == 1: x = x.unsqueeze(0) mc = mc.unsqueeze(0) @@ -783,10 +785,14 @@ def forward(self, x: torch.Tensor, mc: torch.Tensor) -> torch.Tensor: c_a = c_a.reshape(y.size(0), y.size(1), M) pade_coefficients = torch.cumprod(self.weights, 0) * self.a - roots = self.root_pol(pade_coefficients.flip(0)) - p = torch.reciprocal(roots) + pade_coefficients[0] = 1.0 + roots = self.root_pol(pade_coefficients.flip(0).double()) + roots = roots.to( + torch.complex64 if y.dtype == torch.float32 else torch.complex128 + ) - y = y.to(torch.complex64 if y.dtype == torch.float32 else torch.complex128) + y = y.to(roots.dtype) + p = torch.reciprocal(roots) for i in range(len(p)): y = self.sample_wise_lpc(y, p[i] * c_a) y = y.real @@ -802,4 +808,7 @@ def forward(self, x: torch.Tensor, mc: torch.Tensor) -> torch.Tensor: if unsqueezed: y = y.squeeze(0) + + if return_roots: + return y, roots return y From 36ee0946ccae65ca291a14d48e40ed3217ac8aed Mon Sep 17 00:00:00 2001 From: takenori-y Date: Wed, 28 Jan 2026 17:10:23 +0900 Subject: [PATCH 3/9] support separated pade coefs --- diffsptk/modules/mglsadf.py | 139 +++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 64 deletions(-) diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index 0546a97..4ce2082 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -647,6 +647,7 @@ def __init__( chunk_length: int | None = None, warmup_length: int | None = None, learnable: bool = False, + shared_a: bool = True, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: @@ -654,8 +655,6 @@ def __init__( if phase != "minimum": raise ValueError("Only minimum-phase filter is supported.") - if pade_order <= 3 or 15 <= pade_order: - raise ValueError("pade_order must be in [4, 14].") self.ignore_gain = ignore_gain @@ -692,41 +691,6 @@ def __init__( cep_order * chunk_length, cep_order * frame_period, center=False ) - if pade_order == 4: - modified_pade_coefficients = [ - 0.4999273, - 0.1067005, - 0.01170221, - 0.0005656279, - ] - elif pade_order == 5: - modified_pade_coefficients = [ - 0.4999391, - 0.1107098, - 0.01369984, - 0.0009564853, - 0.00003041721, - ] - elif pade_order == 6: - modified_pade_coefficients = [ - 0.499962892438014, - 0.113301885013440, - 0.014990477313604, - 0.001229199693052, - 0.000059608811847, - 0.000001343163774, - ] - elif pade_order == 7: - modified_pade_coefficients = [ - 0.499969087072637, - 0.115077033090460, - 0.015876603489178, - 0.001424479579072, - 0.000083492347365, - 0.000002972456979, - 0.000000049755937, - ] - cr = mp.taylor(mp.exp, 0, pade_order * 2) cp, cq = mp.pade(cr, pade_order, pade_order) cp = np.array([float(x) for x in cp]) @@ -734,17 +698,33 @@ def __init__( weights = np.insert(weights, 0, 1) self.register_buffer("weights", to(weights, device=device, dtype=dtype)) - if 4 <= pade_order <= 7: - modified_pade_coefficients.insert(0, 1) - modified_pade_coefficients = np.array(modified_pade_coefficients) - a = modified_pade_coefficients / cp + if pade_order == 3: + a1 = np.linspace(1.0, 0.4, pade_order + 1) + elif pade_order == 4: + a1 = np.linspace(1.0, 0.6, pade_order + 1) + elif 5 <= pade_order <= 14: + a1 = np.ones(pade_order + 1) else: - a = np.ones(pade_order + 1) - a = to(a, device=device, dtype=dtype) - if learnable: - self.a = nn.Parameter(a) + raise ValueError("pade_order must be in [3, 14].") + + if shared_a: + a1 = to(a1, device=device, dtype=dtype) + if learnable: + self.a1 = nn.Parameter(a1) + else: + self.register_buffer("a1", a1) + self.a2 = self.a1 else: - self.register_buffer("a", a) + a2 = a1 + a1 = np.ones(pade_order + 1) + a1 = to(a1, device=device, dtype=dtype) + a2 = to(a2, device=device, dtype=dtype) + if learnable: + self.a1 = nn.Parameter(a1) + self.a2 = nn.Parameter(a2) + else: + self.register_buffer("a1", a1) + self.register_buffer("a2", a2) def forward( self, x: torch.Tensor, mc: torch.Tensor, return_roots: bool = True @@ -760,19 +740,36 @@ def forward( raise ValueError("x and mc must be 2-D and 3-D tensors, respectively.") c = self.mgc2c(mc) - c0, c = remove_gain(c, value=0, return_gain=True) - c_b = self.linear_intpl(c.flip(-1)) - c_a = self.linear_intpl(c[..., 1:]) + c0, c1 = torch.split(c, [1, c.size(-1) - 1], dim=-1) + c_b = self.linear_intpl(c1.flip(-1)) + c_a = self.linear_intpl(c1) T = x.size(-1) B, _, M = c_a.size() - y = x * self.a[0] - for i in range(1, len(self.a)): + a1 = torch.clip(self.a1, min=1e-1, max=1e+1) + a1[0] = 1.0 + a2 = torch.clip(self.a2, min=1e-1, max=1e+1) + a2[0] = 1.0 + + c_b2, c_b1 = torch.split(c_b, [c_b.size(-1) - 1, 1], dim=-1) + c_b1 = c_b1.squeeze(-1) + + # Numerator, 1st stage: + y = x * a1[0] + for i in range(1, len(a1)): + x = F.pad(x[..., :-1], (1, 0)) + x = x * c_b1 * self.weights[i] + y += x * a1[i] + + # Numerator, 2nd stage: + x = y + y = x * a2[0] + for i in range(1, len(a2)): x = F.pad(x, (M, 0)) x = x.unfold(-1, M + 1, 1) - x = (x * c_b).sum(-1) * self.weights[i] - y += x * self.a[i] + x = (x[..., :-2] * c_b2).sum(-1) * self.weights[i] + y += x * a2[i] if self.chuking: y = F.pad(y, (self.warmup_length, 0)) @@ -784,18 +781,32 @@ def forward( c_a = self.frame_c(c_a) c_a = c_a.reshape(y.size(0), y.size(1), M) - pade_coefficients = torch.cumprod(self.weights, 0) * self.a - pade_coefficients[0] = 1.0 - roots = self.root_pol(pade_coefficients.flip(0).double()) - roots = roots.to( - torch.complex64 if y.dtype == torch.float32 else torch.complex128 - ) + c_a1, c_a2 = torch.split(c_a, [1, c_a.size(-1) - 1], dim=-1) + c_a2 = F.pad(c_a2, (1, 0)) - y = y.to(roots.dtype) - p = torch.reciprocal(roots) - for i in range(len(p)): - y = self.sample_wise_lpc(y, p[i] * c_a) - y = y.real + def compute_roots(a: torch.Tensor) -> torch.Tensor: + pade_coefficients = torch.cumprod(self.weights, 0) * a + roots = self.root_pol(pade_coefficients.flip(0).double()) + roots = roots.to( + torch.complex64 if a.dtype == torch.float32 else torch.complex128 + ) + return roots + + roots1 = compute_roots(a1) + roots2 = compute_roots(a2) + roots = torch.stack([roots1, roots2], dim=0) + + # Denominator, 1st stage: + y = y.to(device="cpu", dtype=roots.dtype) + p1 = torch.reciprocal(roots1) + for i in range(len(p1)): + y = self.sample_wise_lpc(y, (p1[i] * c_a1).cpu()) + + # Denominator, 2nd stage: + p2 = torch.reciprocal(roots2) + for i in range(len(p2)): + y = self.sample_wise_lpc(y, (p2[i] * c_a2).cpu()) + y = y.real.to(x.device) if self.chuking: y = y[..., self.warmup_length :] From 82b5c9da98d2f1d6ba5da8fa3cd4a5bc798a3cac Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 3 Feb 2026 13:04:06 +0900 Subject: [PATCH 4/9] minor fix --- diffsptk/modules/mglsadf.py | 41 +++++++++++++++---------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index 4ce2082..5c1f890 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -83,7 +83,8 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): derived from the impulse response converted from the input mel-cepstral coefficients using FFT. 'freq-domain' performs filtering in the frequency domain rather than the time domain. 'pade-approx' implements the MLSA filter by - cascading all-zero and all-pole filters based on the factorization. + cascading all-zero and all-pole filters based on the factorization. This is slow, + but can be used to optimize the Pade approximation coefficients. n_fft : int >= 1 The number of FFT bins used for conversion. Higher values result in increased @@ -93,7 +94,7 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): The order of the Taylor series expansion (valid only if **mode** is 'multi-stage'). - pade_order : int >= 4 + pade_order : int >= 3 The order of Pade approximation (valid only if **mode** is 'pade-approx'). cep_order : int >= 0 or tuple[int, int] @@ -103,14 +104,6 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): ir_length : int >= 1 or tuple[int, int] The length of the impulse response (valid only if **mode** is 'single-stage'). - chunk_length : int >= 1 or None - The chunk length for processing. If None is given, chunking is not applied - (valid only if **mode** is 'pade-approx') - - warmup_length : int >= 0 or None - The warm-up length for chunk processing. This is required to reduce artifacts - caused by chunking (valid only if **mode** is 'pade-approx'). - learnable : bool If True, the polynomial coefficients used in the approximation are learnable (valid only if **mode** is 'multi-stage' or 'pade-approx'). @@ -647,13 +640,13 @@ def __init__( chunk_length: int | None = None, warmup_length: int | None = None, learnable: bool = False, - shared_a: bool = True, + per_stage_pade_coefficients: bool = False, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: super().__init__() - if phase != "minimum": + if phase != "minimum" or is_array_like(filter_order): raise ValueError("Only minimum-phase filter is supported.") self.ignore_gain = ignore_gain @@ -707,27 +700,27 @@ def __init__( else: raise ValueError("pade_order must be in [3, 14].") - if shared_a: + if per_stage_pade_coefficients: + a2 = a1 + a1 = np.ones(pade_order + 1) a1 = to(a1, device=device, dtype=dtype) + a2 = to(a2, device=device, dtype=dtype) if learnable: self.a1 = nn.Parameter(a1) + self.a2 = nn.Parameter(a2) else: self.register_buffer("a1", a1) - self.a2 = self.a1 + self.register_buffer("a2", a2) else: - a2 = a1 - a1 = np.ones(pade_order + 1) a1 = to(a1, device=device, dtype=dtype) - a2 = to(a2, device=device, dtype=dtype) if learnable: self.a1 = nn.Parameter(a1) - self.a2 = nn.Parameter(a2) else: self.register_buffer("a1", a1) - self.register_buffer("a2", a2) + self.a2 = self.a1 def forward( - self, x: torch.Tensor, mc: torch.Tensor, return_roots: bool = True + self, x: torch.Tensor, mc: torch.Tensor, return_roots: bool = False ) -> torch.Tensor: if x.dim() == 1: x = x.unsqueeze(0) @@ -797,16 +790,16 @@ def compute_roots(a: torch.Tensor) -> torch.Tensor: roots = torch.stack([roots1, roots2], dim=0) # Denominator, 1st stage: - y = y.to(device="cpu", dtype=roots.dtype) + y = y.to(roots.dtype) p1 = torch.reciprocal(roots1) for i in range(len(p1)): - y = self.sample_wise_lpc(y, (p1[i] * c_a1).cpu()) + y = self.sample_wise_lpc(y, (p1[i] * c_a1)) # Denominator, 2nd stage: p2 = torch.reciprocal(roots2) for i in range(len(p2)): - y = self.sample_wise_lpc(y, (p2[i] * c_a2).cpu()) - y = y.real.to(x.device) + y = self.sample_wise_lpc(y, (p2[i] * c_a2)) + y = y.real if self.chuking: y = y[..., self.warmup_length :] From d9f43cccc129a80b64f3230e3961f81b18e378f5 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 26 Feb 2026 12:23:40 +0900 Subject: [PATCH 5/9] fix Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 39f73fb..ddeefac 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ test: tool test-example: tool [ -n "$(MODULE)" ] && module=modules/$(MODULE).py || module=; \ . .venv/bin/activate && export NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS=0 && \ - python -m pytest --doctest-modules --no-cov --ignore=diffsptk/third_party diffsptk/$$module + python -m pytest --doctest-modules --no-cov --ignore=$(PROJECT)/third_party $(PROJECT)/$$module test-clean: rm -rf tests/__pycache__ From 116e8de8b7c3997b1f201a75cc01ad2f504ae534 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 26 Feb 2026 12:37:39 +0900 Subject: [PATCH 6/9] make linter happy --- diffsptk/modules/acorr.py | 7 +++---- diffsptk/modules/mglsadf.py | 9 +++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/diffsptk/modules/acorr.py b/diffsptk/modules/acorr.py index 24d7565..b630053 100644 --- a/diffsptk/modules/acorr.py +++ b/diffsptk/modules/acorr.py @@ -101,10 +101,9 @@ def _precompute( elif out_format in (2, "biased"): formatter = lambda x: x / frame_length elif out_format in (3, "unbiased"): - formatter = lambda x: x / ( - torch.arange( - frame_length, frame_length - acr_order - 1, -1, device=x.device - ) + n = frame_length - acr_order - 1 + formatter = lambda x: ( + x / (torch.arange(frame_length, n, -1, device=x.device)) ) else: raise ValueError(f"out_format {out_format} is not supported.") diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index 5c1f890..b4bf710 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -83,8 +83,9 @@ class PseudoMGLSADigitalFilter(BaseNonFunctionalModule): derived from the impulse response converted from the input mel-cepstral coefficients using FFT. 'freq-domain' performs filtering in the frequency domain rather than the time domain. 'pade-approx' implements the MLSA filter by - cascading all-zero and all-pole filters based on the factorization. This is slow, - but can be used to optimize the Pade approximation coefficients. + cascading all-zero and all-pole filters derived from the factorization. While + this approach is not computationally efficient, it allows for the optimization + of the Pade approximation coefficients. n_fft : int >= 1 The number of FFT bins used for conversion. Higher values result in increased @@ -740,9 +741,9 @@ def forward( T = x.size(-1) B, _, M = c_a.size() - a1 = torch.clip(self.a1, min=1e-1, max=1e+1) + a1 = torch.clip(self.a1, min=1e-1, max=1e1) a1[0] = 1.0 - a2 = torch.clip(self.a2, min=1e-1, max=1e+1) + a2 = torch.clip(self.a2, min=1e-1, max=1e1) a2[0] = 1.0 c_b2, c_b1 = torch.split(c_b, [c_b.size(-1) - 1, 1], dim=-1) From 7401ac1db81771cda4fe67cb4e6017218b155b3a Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 26 Feb 2026 12:48:34 +0900 Subject: [PATCH 7/9] use round --- diffsptk/modules/mcep.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/diffsptk/modules/mcep.py b/diffsptk/modules/mcep.py index f6af827..2392dfb 100644 --- a/diffsptk/modules/mcep.py +++ b/diffsptk/modules/mcep.py @@ -99,9 +99,9 @@ def forward(self, x: torch.Tensor): ... ) >>> x = diffsptk.ramp(19) >>> mc = mcep(stft(x)) - >>> mc - tensor([[-0.8851, 0.7917, -0.1737, 0.0175], - [-0.3522, 4.4222, -1.0882, -0.0510]]) + >>> mc.round(decimals=3) + tensor([[-0.8850, 0.7920, -0.1740, 0.0170], + [-0.3520, 4.4220, -1.0880, -0.0510]]) """ check_size(x.size(-1), self.in_dim, "dimension of spectrum") From c8a48dd1cf900b2fb59eb22fc20323d54ac98bc1 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 26 Feb 2026 12:55:25 +0900 Subject: [PATCH 8/9] fix test --- tests/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index dc86db8..7c916b0 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -280,15 +280,15 @@ def check_various_shape(module, shapes, *, preprocess=None): def check_learnable(module, shapes, complex_input=False): + dtype = None + if complex_input: + dtype = dtype_to_complex_dtype(dtype) + x = [] if not is_array(shapes[0]): shapes = [shapes] for shape in shapes: - x.append(torch.randn(*shape)) - - dtype = None - if complex_input: - dtype = dtype_to_complex_dtype(dtype) + x.append(torch.randn(*shape, dtype=dtype)) params_before = [] for p in module.parameters(): From 583f54730d68686cdfa4ec6fb84a4332bfbc465e Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 26 Feb 2026 13:14:10 +0900 Subject: [PATCH 9/9] fix coverage --- diffsptk/modules/mglsadf.py | 10 +++------- tests/test_mglsadf.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/diffsptk/modules/mglsadf.py b/diffsptk/modules/mglsadf.py index b4bf710..85ae910 100644 --- a/diffsptk/modules/mglsadf.py +++ b/diffsptk/modules/mglsadf.py @@ -701,17 +701,13 @@ def __init__( else: raise ValueError("pade_order must be in [3, 14].") - if per_stage_pade_coefficients: + if learnable and per_stage_pade_coefficients: a2 = a1 a1 = np.ones(pade_order + 1) a1 = to(a1, device=device, dtype=dtype) a2 = to(a2, device=device, dtype=dtype) - if learnable: - self.a1 = nn.Parameter(a1) - self.a2 = nn.Parameter(a2) - else: - self.register_buffer("a1", a1) - self.register_buffer("a2", a2) + self.a1 = nn.Parameter(a1) + self.a2 = nn.Parameter(a2) else: a1 = to(a1, device=device, dtype=dtype) if learnable: diff --git a/tests/test_mglsadf.py b/tests/test_mglsadf.py index a48abfe..00371cd 100644 --- a/tests/test_mglsadf.py +++ b/tests/test_mglsadf.py @@ -264,7 +264,14 @@ def test_pade_approx( ) -@pytest.mark.parametrize("mode", ["multi-stage", "pade-approx"]) -def test_learnable(mode, M=24, P=80, T=160): - mglsadf = diffsptk.MLSA(M, P, mode=mode, learnable=True) +@pytest.mark.parametrize( + ("mode", "params"), + [ + ("multi-stage", {}), + ("pade-approx", {"per_stage_pade_coefficients": False}), + ("pade-approx", {"per_stage_pade_coefficients": True, "pade_order": 3}), + ], +) +def test_learnable(mode, params, M=24, P=80, T=160): + mglsadf = diffsptk.MLSA(M, P, mode=mode, learnable=True, **params) U.check_learnable(mglsadf, [(T,), (T // P, M + 1)])