diff --git a/FlucomaClients.cmake b/FlucomaClients.cmake index e886c649..ec4e6fde 100644 --- a/FlucomaClients.cmake +++ b/FlucomaClients.cmake @@ -83,6 +83,7 @@ function(get_core_client_class name var) set(${var} ${client_info} PARENT_SCOPE) endfunction() # + add_client(AmpFeature flucoma/clients/rt/AmpFeatureClient.hpp CLASS RTAmpFeatureClient ) add_client(AmpGate flucoma/clients/rt/AmpGateClient.hpp CLASS RTAmpGateClient ) add_client(AmpSlice flucoma/clients/rt/AmpSliceClient.hpp CLASS RTAmpSliceClient ) @@ -139,6 +140,8 @@ add_client(SpectralShape flucoma/clients/rt/SpectralShapeClient.hpp CLASS RTSpec add_kr_in_client(Stats flucoma/clients/rt/RunningStatsClient.hpp CLASS RunningStatsClient ) add_client(TransientSlice flucoma/clients/rt/TransientSliceClient.hpp CLASS RTTransientSliceClient ) add_client(Transients flucoma/clients/rt/TransientClient.hpp CLASS RTTransientClient ) +add_kr_in_client(VoiceAllocator flucoma/clients/rt/VoiceAllocatorClient.hpp CLASS VoiceAllocatorClient ) + #lib manipulation client group add_client(DataSet flucoma/clients/nrt/DataSetClient.hpp CLASS NRTThreadedDataSetClient GROUP MANIPULATION) diff --git a/include/flucoma/algorithms/public/VoiceAllocator.hpp b/include/flucoma/algorithms/public/VoiceAllocator.hpp new file mode 100644 index 00000000..93c99b98 --- /dev/null +++ b/include/flucoma/algorithms/public/VoiceAllocator.hpp @@ -0,0 +1,163 @@ +/* +Part of the Fluid Corpus Manipulation Project (http://www.flucoma.org/) +Copyright University of Huddersfield. +Licensed under the BSD-3 License. +See license.md file in the project root for full license information. +This project has received funding from the European Research Council (ERC) +under the European Union’s Horizon 2020 research and innovation programme +(grant agreement No 725899). +*/ + +#pragma once + +#include "../util/PartialTracking.hpp" + +namespace fluid { +namespace algorithm { + +class VoiceAllocator +{ + template + using vector = rt::vector; + +public: + VoiceAllocator(index nVoices, Allocator& alloc) + : mTracking(alloc), mVoices{nVoices}, mFreeVoices(alloc), + mActiveVoices(alloc), mActiveVoiceData(0, alloc) + {} + + void init(index nVoices, Allocator& alloc) + { + mVoices = nVoices; + while (!mActiveVoices.empty()) { mActiveVoices.pop_back(); } + while (!mFreeVoices.empty()) { mFreeVoices.pop(); } + for (index i = 0; i < nVoices; ++i) { mFreeVoices.push(i); } + mActiveVoiceData.resize(nVoices); + for (VoicePeak each : mActiveVoiceData) { each = {0, 0, 0}; } + mTracking.init(); + mInitialized = true; + } + + void processFrame(vector incomingVoices, + vector& outgoingVoices, index minTrackLen, + double birthLowTreshold, double birthHighTreshold, + index trackMethod, double trackMagRange, + double trackFreqRange, double trackProb, index sortMethod, + Allocator& alloc) + { + assert(mInitialized); + + double maxAmp = -144; + for (const SinePeak& voice : incomingVoices) + { + if (voice.logMag > maxAmp) { maxAmp = voice.logMag; } + } + + mTracking.processFrame(incomingVoices, maxAmp, minTrackLen, + birthLowTreshold, birthHighTreshold, trackMethod, + trackMagRange, trackFreqRange, trackProb, alloc); + + outgoingVoices = mTracking.getActiveVoices(alloc); + outgoingVoices = sortVoices(outgoingVoices, sortMethod); + if (outgoingVoices.size() > mVoices) outgoingVoices.resize(mVoices); + outgoingVoices = assignVoices(outgoingVoices, alloc); + + mTracking.prune(); + } + + void reset() { mInitialized = false; } + + bool initialized() const { return mInitialized; } + +private: + vector sortVoices(vector& incomingVoices, + index sortingMethod) + { + switch (sortingMethod) + { + case 0: // lowest + std::sort(incomingVoices.begin(), incomingVoices.end(), + [](const VoicePeak& voice1, const VoicePeak& voice2) { + return voice1.freq < voice2.freq; + }); + break; + case 1: // loudest + std::sort(incomingVoices.begin(), incomingVoices.end(), + [](const VoicePeak& voice1, const VoicePeak& voice2) { + return voice1.logMag > voice2.logMag; + }); + break; + } + return incomingVoices; + } + + vector assignVoices(vector& incomingVoices, + Allocator& alloc) + { + // move released to free + for (index existing = 0; existing < mActiveVoiceData.size(); ++existing) + { + if (mActiveVoiceData[existing].state == + algorithm::VoiceState::kReleaseState) + mActiveVoiceData[existing].state = algorithm::VoiceState::kFreeState; + } + + // handle existing voices - killing or sustaining + for (index existing = 0; existing < mActiveVoices.size(); ++existing) + { + bool killVoice = true; + for (index incoming = 0; incoming < incomingVoices.size(); ++incoming) + { + // remove incoming voice events & allows corresponding voice to live if + // it already exists + if (mActiveVoiceData[mActiveVoices[existing]].voiceID == + incomingVoices[incoming].voiceID) + { + killVoice = false; + mActiveVoiceData[mActiveVoices[existing]] = + incomingVoices[incoming]; // update freq/mag + mActiveVoiceData[mActiveVoices[existing]].state = + algorithm::VoiceState::kSustainState; + incomingVoices.erase(incomingVoices.begin() + incoming); + break; + } + } + if (killVoice) // voice off + { + mActiveVoiceData[mActiveVoices[existing]].state = + algorithm::VoiceState::kReleaseState; + mFreeVoices.push(mActiveVoices[existing]); + mActiveVoices.erase(mActiveVoices.begin() + existing); + --existing; + } + } + + // handle new voice allocation + for (index incoming = 0; incoming < incomingVoices.size(); ++incoming) + { + if (!mFreeVoices.empty()) // voice on + { + index newVoiceIndex = mFreeVoices.front(); + mFreeVoices.pop(); + mActiveVoices.push_back(newVoiceIndex); + algorithm::VoiceState prevState = mActiveVoiceData[newVoiceIndex].state; + mActiveVoiceData[newVoiceIndex] = incomingVoices[incoming]; + if (prevState == algorithm::VoiceState::kReleaseState) // mark as stolen + mActiveVoiceData[newVoiceIndex].state = + algorithm::VoiceState::kStolenState; + } + } + + return mActiveVoiceData; + } + + PartialTracking mTracking; + index mVoices; + rt::queue mFreeVoices; + rt::deque mActiveVoices; + vector mActiveVoiceData; + + bool mInitialized{false}; +}; +} // namespace algorithm +} // namespace fluid diff --git a/include/flucoma/algorithms/util/PartialTracking.hpp b/include/flucoma/algorithms/util/PartialTracking.hpp index a4ac7926..feb61317 100644 --- a/include/flucoma/algorithms/util/PartialTracking.hpp +++ b/include/flucoma/algorithms/util/PartialTracking.hpp @@ -25,6 +25,14 @@ Capability through Linear Programming". Proceedings of DAFx-2018. namespace fluid { namespace algorithm { +enum class VoiceState { + kFreeState, + kAttackState, + kSustainState, + kReleaseState, + kStolenState +}; + struct SinePeak { double freq; @@ -32,15 +40,23 @@ struct SinePeak bool assigned; }; -struct SineTrack +struct VoicePeak { + double freq; + double logMag; + index voiceID; + VoiceState state; +}; +struct SineTrack +{ SineTrack(Allocator& alloc) : peaks(alloc) {} SineTrack(rt::vector&& p, index s, index e, bool a, bool ass, index t) - : peaks{p}, startFrame{s}, endFrame{e}, active{a}, assigned{ass}, trackId{ - t} + : peaks{p}, startFrame{s}, endFrame{e}, active{a}, assigned{ass}, + trackId{t} + {} rt::vector peaks; @@ -134,6 +150,29 @@ class PartialTracking return sinePeaks; } + // todo - refactor this function with the one above + vector getActiveVoices(Allocator& alloc) + { + vector voicePeaks(0, alloc); + index latencyFrame = mCurrentFrame - mMinTrackLength; + if (latencyFrame < 0) return voicePeaks; + for (auto&& track : mTracks) + { + if (track.startFrame > latencyFrame) continue; + if (track.endFrame >= 0 && track.endFrame <= latencyFrame) continue; + if (track.endFrame >= 0 && + track.endFrame - track.startFrame < mMinTrackLength) + continue; + voicePeaks.push_back( + {track.peaks[asUnsigned(latencyFrame - track.startFrame)].freq, + pow(10, + track.peaks[asUnsigned(latencyFrame - track.startFrame)].logMag / + 20), + track.trackId, VoiceState::kAttackState}); + } + return voicePeaks; + } + private: void updateVariances() { diff --git a/include/flucoma/clients/common/FluidNRTClientWrapper.hpp b/include/flucoma/clients/common/FluidNRTClientWrapper.hpp index 25e58f1c..5524cef7 100644 --- a/include/flucoma/clients/common/FluidNRTClientWrapper.hpp +++ b/include/flucoma/clients/common/FluidNRTClientWrapper.hpp @@ -117,6 +117,18 @@ struct IsControlOut> constexpr static bool value{std::is_base_of::value}; }; +template +struct IsControlIn +{ + constexpr static bool value = std::is_base_of::value; +}; + +template +struct IsControlIn> +{ + constexpr static bool value{std::is_base_of::value}; +}; + template struct AddPadding @@ -126,18 +138,13 @@ struct AddPadding static constexpr bool HasFFT = impl::FilterTupleIndices::type::size() > 0; static constexpr bool HasControlOut = IsControlOut::value; - // static constexpr size_t value = HasControlOut? 2 : 1; + static constexpr bool HasControlIn = IsControlIn::value; static constexpr size_t value = HasFFT && HasControlOut ? 2 : HasFFT && !HasControlOut ? 1 - : !HasFFT && HasControlOut ? 3 - : 0; - - - // static constexpr size_t value = std::conditional_t, - // std::integral_constant>, - // std::integral_constant>()(); + : !HasFFT && HasControlOut && !HasControlIn + ? 3 + : 0; }; // Special case for Loudness :`-( @@ -169,7 +176,7 @@ class NRTClientWrapper : public OfflineIn, public OfflineOut static constexpr auto isControl = std::is_same, StreamingControl>(); - + using ParamDescType = ParamType; using ParamSetType = ParameterSet; using ParamSetViewType = ParameterSetView; @@ -219,8 +226,9 @@ class NRTClientWrapper : public OfflineIn, public OfflineOut NRTClientWrapper(NRTClientWrapper&& x) - : mParams{std::move(x.mParams)}, - mNRTContext{std::move(x.mNRTContext)}, mClient{std::move(x.mClient)} + : mParams{std::move(x.mParams)}, mNRTContext{std::move(x.mNRTContext)}, + mClient{std::move(x.mClient)} + { mRealTimeParams = RTParamSetViewType(RTClient::getParameterDescriptors(), @@ -581,12 +589,14 @@ struct StreamingControl std::fill_n(std::back_inserter(inputData), inputBuffers.size(), HostMatrix(nChans, paddedLength)); - std::vector outputData; + std::vector outputData; + outputData.reserve(outputBuffers.size()); std::fill_n(std::back_inserter(outputData), outputBuffers.size(), HostMatrix(nChans * maxFeatures, nAnalysisFrames)); - double sampleRate{0}; + double sampleRate{0}; + // Copy input data for (index i = 0; i < nChans; ++i) { @@ -619,10 +629,10 @@ struct StreamingControl inputData[asUnsigned(k)].row(i)(Slice(t, controlRate))); } - for(auto& out: outputData) + for (auto& out : outputData) { - outputs.push_back( - out.col(j)(Slice(i * maxFeatures, maxFeatures))); + outputs.push_back(out.col(j)(Slice(i * maxFeatures, maxFeatures))); + } client.process(inputs, outputs, c); @@ -650,11 +660,95 @@ struct StreamingControl for (index i = 0; i < nFeatures; ++i) { for (index j = 0; j < nChans; ++j) - thisOutput.samps(i + j * nFeatures) <<= - outs.second->row(i + j * maxFeatures)(Slice(latencyHops, keepHops)); + thisOutput.samps(i + j * nFeatures) <<= outs.second->row( + i + j * maxFeatures)(Slice(latencyHops, keepHops)); + } + } + + return {}; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +template +struct ControlControl +{ + template + static Result process(Client& client, InputList& inputBuffers, + OutputList& outputBuffers, index nFrames, index nChans, + std::pair userPadding, FluidContext& c) + { + // To account for process latency we need to copy the buffers with padding + std::vector inputData; + index maxFeatures = client.maxControlChannelsOut(); + + inputData.reserve(inputBuffers.size()); + + index startPadding = client.latency() + userPadding.first; + index totalPadding = startPadding + userPadding.first; + + index paddedLength = nFrames + totalPadding; + + std::fill_n(std::back_inserter(inputData), inputBuffers.size(), + HostMatrix(nChans, paddedLength)); + + std::vector outputData; + outputData.reserve(outputBuffers.size()); + std::fill_n( + std::back_inserter(outputData), outputBuffers.size(), + HostMatrix(nChans * maxFeatures, + paddedLength)); // TODO: check padded behaviour for output + + double sampleRate{0}; + + // Copy input data (strangely by time series so we have to iterate later) + for (index i = 0; i < nChans; ++i) + { + for (index j = 0; j < asSigned(inputBuffers.size()); ++j) + { + BufferAdaptor::ReadAccess thisInput(inputBuffers[asUnsigned(j)].buffer); + if (i == 0 && j == 0) sampleRate = thisInput.sampleRate(); + inputData[asUnsigned(j)].row(i)(Slice(userPadding.first, nFrames)) <<= + thisInput.samps(inputBuffers[asUnsigned(j)].startFrame, nFrames, + inputBuffers[asUnsigned(j)].startChan + i); } } - + + std::vector inputs(inputBuffers.size(), {nullptr, 0, 0}); + std::vector outputs(outputBuffers.size(), {nullptr, 0, 0}); + + FluidTask* task = c.task(); + + // run the algorithm + client.reset(c); + + for (index i = 0; i < nFrames; ++i) // iterate each frame as time series + { + for (std::size_t j = 0; j < inputBuffers.size(); ++j) + inputs[j] = inputData[j].col(i); + for (std::size_t j = 0; j < outputBuffers.size(); ++j) + outputs[j] = outputData[j].col(i); + + client.process(inputs, outputs, c); + + if (task && !task->processUpdate(static_cast(i), + static_cast(nFrames))) + break; + } + + // copy to outbuf + for (index i = 0; i < asSigned(outputBuffers.size()); ++i) + { + if (!outputBuffers[asUnsigned(i)]) continue; + BufferAdaptor::Access thisOutput(outputBuffers[asUnsigned(i)]); + Result r = thisOutput.resize(nFrames, nChans, sampleRate); + if (!r.ok()) return r; + for (index j = 0; j < nChans; ++j) + thisOutput.samps(j) <<= + outputData[asUnsigned(i)].row(j)(Slice(startPadding, nFrames)); + } + + return {}; } }; @@ -740,6 +834,11 @@ using NRTControlAdaptor = impl::NRTClientWrapper; +template +using NRTDualControlAdaptor = + impl::NRTClientWrapper; + ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -925,10 +1024,8 @@ class NRTThreadingAdaptor : public OfflineIn, public OfflineOut state = kDoneStillProcessing; mThreadedTask->mState = kDoneStillProcessing; } - else - { - mThreadedTask = nullptr; - } + else { mThreadedTask = nullptr; } + } return state; @@ -1026,8 +1123,9 @@ class NRTThreadingAdaptor : public OfflineIn, public OfflineOut }; ThreadedTask(ClientPointer client, NRTJob& job, bool synchronous) - : mProcessParams(job.mParams), mState(kNoProcess), - mClient(client), mContext{mTask}, mCallback{job.mCallback} + : mProcessParams(job.mParams), mState(kNoProcess), mClient(client), + mContext{mTask}, mCallback{job.mCallback} + { assert(mClient.get() != nullptr); // right? diff --git a/include/flucoma/clients/rt/VoiceAllocatorClient.hpp b/include/flucoma/clients/rt/VoiceAllocatorClient.hpp new file mode 100644 index 00000000..21eb50d4 --- /dev/null +++ b/include/flucoma/clients/rt/VoiceAllocatorClient.hpp @@ -0,0 +1,175 @@ +/* +Part of the Fluid Corpus Manipulation Project (http://www.flucoma.org/) +Copyright 2017-2019 University of Huddersfield. +Licensed under the BSD-3 License. +See license.md file in the project root for full license information. +This project has received funding from the European Research Council (ERC) +under the European Union’s Horizon 2020 research and innovation programme +(grant agreement No 725899). +*/ + +#pragma once + +#include "../common/AudioClient.hpp" +#include "../common/FluidBaseClient.hpp" +#include "../common/FluidNRTClientWrapper.hpp" +#include "../common/FluidSource.hpp" +#include "../common/ParameterConstraints.hpp" +#include "../common/ParameterSet.hpp" +#include "../common/ParameterTypes.hpp" +#include "../../algorithms/public/VoiceAllocator.hpp" +#include "../../data/TensorTypes.hpp" + +namespace fluid { +namespace client { +namespace voiceallocator { + +template +using HostVector = FluidTensorView; + +enum VoiceAllocatorParamIndex { + kNVoices, + kPrioritisedVoices, + kBirthLowThreshold, + kBirthHighTreshold, + kMinTrackLen, + kTrackMagRange, + kTrackFreqRange, + kTrackProb +}; + +constexpr auto VoiceAllocatorParams = defineParameters( + LongParamRuntimeMax("numVoices", "Number of Voices", 1, Min(1)), + EnumParam("prioritisedVoices", "Prioritised Voice Quality", 0, + "Lowest Frequency", "Loudest Magnitude"), + FloatParam("birthLowThreshold", "Track Birth Low Frequency Threshold", -24, + Min(-144), Max(0)), + FloatParam("birthHighThreshold", "Track Birth High Frequency Threshold", + -60, Min(-144), Max(0)), + LongParam("minTrackLen", "Minimum Track Length", 1, Min(1)), + FloatParam("trackMagRange", "Tracking Magnitude Range (dB)", 15., Min(1.), + Max(200.)), + FloatParam("trackFreqRange", "Tracking Frequency Range (Hz)", 50., Min(1.), + Max(10000.)), + FloatParam("trackProb", "Tracking Matching Probability", 0.5, Min(0.0), + Max(1.0))); + +class VoiceAllocatorClient : public FluidBaseClient, + public ControlIn, + ControlOut +{ + using VoicePeak = algorithm::VoicePeak; + using SinePeak = algorithm::SinePeak; + +public: + using ParamDescType = decltype(VoiceAllocatorParams); + + using ParamSetViewType = ParameterSetView; + std::reference_wrapper mParams; + + void setParams(ParamSetViewType& p) { mParams = p; } + + template + auto& get() const + { + return mParams.get().template get(); + } + + static constexpr auto& getParameterDescriptors() + { + return VoiceAllocatorParams; + } + + VoiceAllocatorClient(ParamSetViewType& p, FluidContext& c) + : mParams(p), mVoiceAllocator(get().max(), c.allocator()), + mSizeTracker{0} + { + controlChannelsIn(2); + controlChannelsOut({3, get(), get().max()}); + setInputLabels({"frequencies", "magnitudes"}); + setOutputLabels({"frequencies", "magnitudes", "states"}); + mVoiceAllocator.init(get(), c.allocator()); + } + + template + void process(std::vector>& input, + std::vector>& output, FluidContext& c) + { + if (!input[0].data()) return; + if (!output[0].data() && !output[1].data()) return; + if (!mVoiceAllocator.initialized() || mSizeTracker.changed(get())) + { + controlChannelsOut({4, get()}); // update the dynamic out size + mVoiceAllocator.init(get(), c.allocator()); + } + + rt::vector incomingVoices(0, c.allocator()); + rt::vector outgoingVoices(0, c.allocator()); + + for (index i = 0; i < input[0].size(); ++i) + { + if (input[1].row(i) != 0 && input[0].row(i) != 0) + { + double logMag = + 20 * log10(std::max(static_cast(input[1].row(i)), + algorithm::epsilon)); + incomingVoices.push_back({input[0].row(i), logMag, false}); + } + } + + mVoiceAllocator.processFrame( + incomingVoices, outgoingVoices, get(), + get(), get(), 0, + get(), get(), get(), + get(), c.allocator()); + + for (index i = 0; i < static_cast(get()); ++i) + { + output[2].row(i) = static_cast(outgoingVoices[i].state); + output[1].row(i) = outgoingVoices[i].logMag; + output[0].row(i) = outgoingVoices[i].freq; + } + } + + MessageResult clear() + { + mVoiceAllocator.reset(); + return {}; + } + + void reset(FluidContext&) { clear(); } + + static auto getMessageDescriptors() + { + return defineMessages(makeMessage("clear", &VoiceAllocatorClient::clear)); + } + + index latency() const { return 0; } + +private: + algorithm::VoiceAllocator mVoiceAllocator; + ParameterTrackChanges mSizeTracker; +}; + +} // namespace voiceallocator + +using VoiceAllocatorClient = + ClientWrapper; + +auto constexpr NRTVoiceAllocatorParams = + makeNRTParams( + InputBufferParam("frequencies", "Source F Buffer"), + InputBufferParam("magnitudes", "Source M Buffer"), + BufferParam("freqed", "dest f Buffer"), + BufferParam("magned", "dest m Buffer"), + BufferParam("voiced", "dest v Buffer")); + +using NRTVoiceAllocatorClient = + NRTDualControlAdaptor; + +using NRTThreadedVoiceAllocatorClient = + NRTThreadingAdaptor; +} // namespace client +} // namespace fluid