From 367cc930e339aa01bcf7b35b8c148400613cf004 Mon Sep 17 00:00:00 2001 From: tremblap Date: Wed, 19 Oct 2022 15:13:55 +0100 Subject: [PATCH 1/8] working placeholders --- doc/SineFeature.rst | 82 +++++++++++++++++++++++++++++++++ example-code/sc/SineFeature.scd | 55 ++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 doc/SineFeature.rst create mode 100644 example-code/sc/SineFeature.scd diff --git a/doc/SineFeature.rst b/doc/SineFeature.rst new file mode 100644 index 00000000..47ce0800 --- /dev/null +++ b/doc/SineFeature.rst @@ -0,0 +1,82 @@ +:digest: Sinusoidal Modelling and Resynthesis +:species: transformer +:sc-categories: Libraries>FluidDecomposition +:sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc +:see-also: Sines +:description: Sinusoidal Modelling process on its audio input. +:discussion: + It implements a mix of algorithms taken from classic papers. + + The algorithm will take an audio in, and will divide it in two parts: + * a reconstruction of what it detects as sinusoidal; + * a residual derived from the previous signal to allow null-summing + + The whole process is based on the assumption that signal is made of pitched steady components that have a long-enough duration and are periodic enough to be perceived as such, that can be tracked, resynthesised and removed from the original, leaving behind what is considered as non-pitched, noisy, and/or transient. It first tracks the peaks, then checks if they are the continuation of a peak in previous spectral frames, by assigning them a track. + +:process: The audio rate version of the object. +:output: An array of two audio streams: [0] is the harmonic part extracted, [1] is the rest. The latency between the input and the output is (( hopSize * minTrackLen) + windowSize) samples. + + +:control in: + + The input to be processed + +:control bandwidth: + + The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. + +:control numPeaks: + + The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. + +:control detectionThreshold: + + The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. + +:control birthLowThreshold: + + The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the low end of the spectrum. It is interpolated across the spectrum until birthHighThreshold at half-Nyquist. + +:control birthHighThreshold: + + The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the high end of the spectrum. It is interpolated across the spectrum until birthLowThreshold at DC. + +:control minTrackLen: + + The minimum duration, in spectral frames, for a sinusoidal track to be accepted as a partial. It allows to remove bubbly pitchy artefacts, but is more CPU intensive and might reject quick pitch material. + +:control trackingMethod: + + The algorithm used to track the sinusoidal continuity between spectral frames. 0 is the default, "Greedy", and 1 is a more expensive [^"Hungarian"]( Neri, J., and Depalle, P., "Fast Partial Tracking of Audio with Real-Time Capability through Linear Programming". Proceedings of DAFx-2018. ) one. + +:control trackMagRange: + + The amplitude difference allowed for a track to diverge between frames, in dB. + +:control trackFreqRange: + + The frequency difference allowed for a track to diverge between frames, in Hertz. + +:control trackProb: + + The probability of the tracking algorithm to find a track. + +:control windowSize: + + The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ + +:control hopSize: + + The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). + +:control fftSize: + + The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. + +:control maxFFTSize: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + +:control maxNumPeaks: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. diff --git a/example-code/sc/SineFeature.scd b/example-code/sc/SineFeature.scd new file mode 100644 index 00000000..43735fac --- /dev/null +++ b/example-code/sc/SineFeature.scd @@ -0,0 +1,55 @@ + +CODE:: + +~src = Buffer.readChannel(s,FluidFilesPath("Tremblay-BeatRemember.wav"),channels:[0]); + +( +~synth = { + arg which = 0, detectionThreshold = -96, minTrackLen = 15; + var src = PlayBuf.ar(1,~src,BufRateScale.ir(~src),loop:1); + var sines, residual; + # sines, residual = FluidSines.ar(src,detectionThreshold:detectionThreshold,minTrackLen:minTrackLen); + Select.ar(which,[sines,residual]).dup; +}.play; +) + +~synth.set(\which,1) // residual +~synth.set(\which,0) // back to sinusoids + +// try some different parameters + +// tracks can be short but the detection threshold is higher than the default +~synth.set(\detectionThreshold,-40,\minTrackLen,1) + +// increase the minimum track length +~synth.set(\detectionThreshold,-40,\minTrackLen,15) + +// lower the threshold but increase the track length drastically +~synth.set(\detectionThreshold,-80,\minTrackLen,50) + +// increase the threshold drastically but lower the minimum track length +~synth.set(\detectionThreshold,-24,\minTrackLen,1) +:: +strong::a little more explanation:: +With these settings everything in the sound is considered a sinusoid, even short and quiet peaks. + +Because the decomposition is a windowed process, the detected sinusoidal peaks are located in time based on the window of analysis. When the oscillator changes (even slowly) over time we hear the artefact in the residual output. +code:: + +( +~synth = { + arg which = 0; + var stable = SinOsc.ar(69.midicps,0,0.1); + var oscillating = SinOsc.ar(SinOsc.kr(0.1,0,12,57).midicps,0,0.1); + var sig = SelectX.ar(which.lag(0.1),[stable,oscillating]); + var sines, residual; + # sines, residual = FluidSines.ar(sig,76,-144,-144,-144,1,0,200,1000,0); + [sines, residual * ((which*40) + 1).lag(0.1)] +}.play +) + +~synth.set(\which,1); + +~synth.set(\which,0); + +:: \ No newline at end of file From f36ddb22b9efb539a6d0c4e6589d4a5e7bddf95c Mon Sep 17 00:00:00 2001 From: tremblap Date: Sat, 22 Oct 2022 10:19:44 +0100 Subject: [PATCH 2/8] placeholders for *sineFeature --- doc/BufSineFeature.rst | 82 ++++++++++++++++++++++++++++++ example-code/sc/BufSineFeature.scd | 79 ++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 doc/BufSineFeature.rst create mode 100644 example-code/sc/BufSineFeature.scd diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst new file mode 100644 index 00000000..47ce0800 --- /dev/null +++ b/doc/BufSineFeature.rst @@ -0,0 +1,82 @@ +:digest: Sinusoidal Modelling and Resynthesis +:species: transformer +:sc-categories: Libraries>FluidDecomposition +:sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc +:see-also: Sines +:description: Sinusoidal Modelling process on its audio input. +:discussion: + It implements a mix of algorithms taken from classic papers. + + The algorithm will take an audio in, and will divide it in two parts: + * a reconstruction of what it detects as sinusoidal; + * a residual derived from the previous signal to allow null-summing + + The whole process is based on the assumption that signal is made of pitched steady components that have a long-enough duration and are periodic enough to be perceived as such, that can be tracked, resynthesised and removed from the original, leaving behind what is considered as non-pitched, noisy, and/or transient. It first tracks the peaks, then checks if they are the continuation of a peak in previous spectral frames, by assigning them a track. + +:process: The audio rate version of the object. +:output: An array of two audio streams: [0] is the harmonic part extracted, [1] is the rest. The latency between the input and the output is (( hopSize * minTrackLen) + windowSize) samples. + + +:control in: + + The input to be processed + +:control bandwidth: + + The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. + +:control numPeaks: + + The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. + +:control detectionThreshold: + + The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. + +:control birthLowThreshold: + + The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the low end of the spectrum. It is interpolated across the spectrum until birthHighThreshold at half-Nyquist. + +:control birthHighThreshold: + + The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the high end of the spectrum. It is interpolated across the spectrum until birthLowThreshold at DC. + +:control minTrackLen: + + The minimum duration, in spectral frames, for a sinusoidal track to be accepted as a partial. It allows to remove bubbly pitchy artefacts, but is more CPU intensive and might reject quick pitch material. + +:control trackingMethod: + + The algorithm used to track the sinusoidal continuity between spectral frames. 0 is the default, "Greedy", and 1 is a more expensive [^"Hungarian"]( Neri, J., and Depalle, P., "Fast Partial Tracking of Audio with Real-Time Capability through Linear Programming". Proceedings of DAFx-2018. ) one. + +:control trackMagRange: + + The amplitude difference allowed for a track to diverge between frames, in dB. + +:control trackFreqRange: + + The frequency difference allowed for a track to diverge between frames, in Hertz. + +:control trackProb: + + The probability of the tracking algorithm to find a track. + +:control windowSize: + + The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ + +:control hopSize: + + The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). + +:control fftSize: + + The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. + +:control maxFFTSize: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + +:control maxNumPeaks: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. diff --git a/example-code/sc/BufSineFeature.scd b/example-code/sc/BufSineFeature.scd new file mode 100644 index 00000000..228ad667 --- /dev/null +++ b/example-code/sc/BufSineFeature.scd @@ -0,0 +1,79 @@ +code:: + +( +~src = Buffer.readChannel(s,FluidFilesPath("Tremblay-BeatRemember.wav"),channels:[0]); +~sines = Buffer(s); +~residual = Buffer(s); +) + +// listen to the original if you want +~src.play; + +FluidBufSines.processBlocking(s,~src,sines:~sines,residual:~residual,action:{"done".postln;}); + +~sines.play; +~residual.play; + +// change some parameters + +// tracks can be short but the detection threshold is higher than the default +( +FluidBufSines.processBlocking(s, + ~src, + sines:~sines, + residual:~residual, + detectionThreshold:-40, + minTrackLen:1, + action:{"done".postln;} +); +) + +~sines.play; +~residual.play; + +// increase the minimum track length +( +FluidBufSines.processBlocking(s, + ~src, + sines:~sines, + residual:~residual, + detectionThreshold:-40, + minTrackLen:15, + action:{"done".postln;} +); +) + +~sines.play; +~residual.play; + +// lower the threshold but increase the track length drastically +( +FluidBufSines.processBlocking(s, + ~src, + sines:~sines, + residual:~residual, + detectionThreshold:-80, + minTrackLen:50, + action:{"done".postln;} +); +) + +~sines.play; +~residual.play; + +// increase the threshold drastically but lower the minimum track length +( +FluidBufSines.processBlocking(s, + ~src, + sines:~sines, + residual:~residual, + detectionThreshold:-24, + minTrackLen:1, + action:{"done".postln;} +); +) + +~sines.play; +~residual.play; + +:: \ No newline at end of file From 932e01258c38c271270ff93a36f9bb306fc1fe8c Mon Sep 17 00:00:00 2001 From: tremblap Date: Sat, 22 Oct 2022 15:02:55 +0100 Subject: [PATCH 3/8] revised help file and working code example --- doc/BufSineFeature.rst | 81 +++++++++++------------ doc/SineFeature.rst | 54 +++------------- example-code/sc/BufSineFeature.scd | 100 +++++++++++------------------ example-code/sc/SineFeature.scd | 66 ++++++------------- 4 files changed, 105 insertions(+), 196 deletions(-) diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst index 47ce0800..57d6b6e7 100644 --- a/doc/BufSineFeature.rst +++ b/doc/BufSineFeature.rst @@ -1,82 +1,73 @@ -:digest: Sinusoidal Modelling and Resynthesis -:species: transformer -:sc-categories: Libraries>FluidDecomposition +:digest: Buffer-Based Sinusoidal Peak Tracking +:species: buffer-proc +:sc-categories: Libraries>FluidDecomposition, UGens>Buffer :sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc -:see-also: Sines -:description: Sinusoidal Modelling process on its audio input. +:see-also: SineFeature, BufSines +:description: Interpolated Sinusoidal Peak Tracking on the Spectrum of audio stored in a buffer. :discussion: - It implements a mix of algorithms taken from classic papers. + This process is tracking peaks in the spectrum of audio stored in a buffer, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`BufSines`. + + The process will return two buffers containing time series that describes the interpolated frequencies and magnitudes changing over time in the source buffer. - The algorithm will take an audio in, and will divide it in two parts: - * a reconstruction of what it detects as sinusoidal; - * a residual derived from the previous signal to allow null-summing +:process: This is the method that calls for the slicing to be calculated on a given source buffer. +:output: Nothing, as the various destination buffers are declared in the function call. - The whole process is based on the assumption that signal is made of pitched steady components that have a long-enough duration and are periodic enough to be perceived as such, that can be tracked, resynthesised and removed from the original, leaving behind what is considered as non-pitched, noisy, and/or transient. It first tracks the peaks, then checks if they are the continuation of a peak in previous spectral frames, by assigning them a track. +:control source: -:process: The audio rate version of the object. -:output: An array of two audio streams: [0] is the harmonic part extracted, [1] is the rest. The latency between the input and the output is (( hopSize * minTrackLen) + windowSize) samples. + The |buffer| to use as the source material. The channels of multichannel buffers will be processed sequentially. +:control startFrame: -:control in: + The starting point for analysis in the source (in samples). - The input to be processed +:control numFrames: -:control bandwidth: + The duration (in samples) to analyse. - The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. +:control startChan: -:control numPeaks: - - The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. - -:control detectionThreshold: - - The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. - -:control birthLowThreshold: + For multichannel sources, the starting channel to analyse. - The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the low end of the spectrum. It is interpolated across the spectrum until birthHighThreshold at half-Nyquist. +:control numChans: -:control birthHighThreshold: + For multichannel sources, the number of channels to analyse. - The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the high end of the spectrum. It is interpolated across the spectrum until birthLowThreshold at DC. +:control frequency: -:control minTrackLen: + The buffer where the interpolated frequency of the peaks will be written. - The minimum duration, in spectral frames, for a sinusoidal track to be accepted as a partial. It allows to remove bubbly pitchy artefacts, but is more CPU intensive and might reject quick pitch material. +:control magnitude: -:control trackingMethod: - - The algorithm used to track the sinusoidal continuity between spectral frames. 0 is the default, "Greedy", and 1 is a more expensive [^"Hungarian"]( Neri, J., and Depalle, P., "Fast Partial Tracking of Audio with Real-Time Capability through Linear Programming". Proceedings of DAFx-2018. ) one. - -:control trackMagRange: + The buffer where the interpolated magnitude of the peaks will be written. + +:control numPeaks: - The amplitude difference allowed for a track to diverge between frames, in dB. + The number of peaks to search report back. It is capped at (fftSize / 2) + 1. -:control trackFreqRange: +:control detectionThreshold: - The frequency difference allowed for a track to diverge between frames, in Hertz. + The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. -:control trackProb: +:control sortBy: - The probability of the tracking algorithm to find a track. + How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). :control windowSize: - The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ + The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ :control hopSize: - The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). + The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). :control fftSize: - The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. + The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. :control maxFFTSize: - How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. :control maxNumPeaks: - How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + Up to how many peaks can be reported, by allocating memory at instantiation time. This cannot be modulated. diff --git a/doc/SineFeature.rst b/doc/SineFeature.rst index 47ce0800..13139731 100644 --- a/doc/SineFeature.rst +++ b/doc/SineFeature.rst @@ -1,65 +1,31 @@ -:digest: Sinusoidal Modelling and Resynthesis -:species: transformer +:digest: Sinusoidal Peak Tracking +:species: descriptor :sc-categories: Libraries>FluidDecomposition :sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc -:see-also: Sines -:description: Sinusoidal Modelling process on its audio input. +:see-also: BufSineFeature, Sines +:description: Interpolated Sinusoidal Peak Tracking on the Spectrum. :discussion: - It implements a mix of algorithms taken from classic papers. - - The algorithm will take an audio in, and will divide it in two parts: - * a reconstruction of what it detects as sinusoidal; - * a residual derived from the previous signal to allow null-summing - - The whole process is based on the assumption that signal is made of pitched steady components that have a long-enough duration and are periodic enough to be perceived as such, that can be tracked, resynthesised and removed from the original, leaving behind what is considered as non-pitched, noisy, and/or transient. It first tracks the peaks, then checks if they are the continuation of a peak in previous spectral frames, by assigning them a track. + This process is tracking peaks in the spectrum, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`Sines`. :process: The audio rate version of the object. -:output: An array of two audio streams: [0] is the harmonic part extracted, [1] is the rest. The latency between the input and the output is (( hopSize * minTrackLen) + windowSize) samples. +:output: An array of two control streams: [0] is the interpolated frequency of the peaks extracted in Hz, [1] is their respective magnitudes in dB. The latency between the input and the output is windowSize samples. :control in: The input to be processed -:control bandwidth: - - The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. - :control numPeaks: - The number of bins used to resynthesise a peak. It has an effect on CPU cost: the widest is more accurate but more computationally expensive. It is capped at (fftSize / 2) + 1. + The number of peaks to search report back. It is capped at (fftSize / 2) + 1. :control detectionThreshold: The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. -:control birthLowThreshold: - - The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the low end of the spectrum. It is interpolated across the spectrum until birthHighThreshold at half-Nyquist. - -:control birthHighThreshold: - - The threshold in dB above which to consider a peak to start a sinusoidal component tracking, for the high end of the spectrum. It is interpolated across the spectrum until birthLowThreshold at DC. - -:control minTrackLen: - - The minimum duration, in spectral frames, for a sinusoidal track to be accepted as a partial. It allows to remove bubbly pitchy artefacts, but is more CPU intensive and might reject quick pitch material. - -:control trackingMethod: - - The algorithm used to track the sinusoidal continuity between spectral frames. 0 is the default, "Greedy", and 1 is a more expensive [^"Hungarian"]( Neri, J., and Depalle, P., "Fast Partial Tracking of Audio with Real-Time Capability through Linear Programming". Proceedings of DAFx-2018. ) one. - -:control trackMagRange: - - The amplitude difference allowed for a track to diverge between frames, in dB. - -:control trackFreqRange: - - The frequency difference allowed for a track to diverge between frames, in Hertz. - -:control trackProb: +:control sortBy: - The probability of the tracking algorithm to find a track. + How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). :control windowSize: @@ -79,4 +45,4 @@ :control maxNumPeaks: - How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + Up to how many peaks can be reported, by allocating memory at instantiation time. This cannot be modulated. diff --git a/example-code/sc/BufSineFeature.scd b/example-code/sc/BufSineFeature.scd index 228ad667..6640fb05 100644 --- a/example-code/sc/BufSineFeature.scd +++ b/example-code/sc/BufSineFeature.scd @@ -1,79 +1,55 @@ +strong::Plot the BufSineFeature curves one over the other:: code:: +~oboe = Buffer.read(s,FluidFilesPath("Harker-DS-TenOboeMultiphonics-M.wav"),numFrames: 311000); ( -~src = Buffer.readChannel(s,FluidFilesPath("Tremblay-BeatRemember.wav"),channels:[0]); -~sines = Buffer(s); -~residual = Buffer(s); +~freq = Buffer(s); +~mags = Buffer(s); +FluidBufSineFeature.processBlocking(s,~oboe, frequency: ~freq, magnitude: ~mags, numPeaks: 5); +w = Window("FluidWaveform Test",Rect(0,0,1000,500)); +w.view.layout = VLayout( + FluidWaveform(~oboe,featuresBuffer: ~freq,standalone: false), + FluidWaveform(~oboe,featuresBuffer: ~mags,standalone: false)); +w.front; ) -// listen to the original if you want -~src.play; +strong::A few didactic examples:: +code:: -FluidBufSines.processBlocking(s,~src,sines:~sines,residual:~residual,action:{"done".postln;}); +//mono source of 3 sines +b = {SinOsc.ar([440,789,535],mul: [0.01,0.03,0.02]).sum}.asBuffer(1) -~sines.play; -~residual.play; +//listen +b.play -// change some parameters +//make destination buffers +~freq = Buffer(s); ~mags = Buffer(s); -// tracks can be short but the detection threshold is higher than the default -( -FluidBufSines.processBlocking(s, - ~src, - sines:~sines, - residual:~residual, - detectionThreshold:-40, - minTrackLen:1, - action:{"done".postln;} -); -) +//process +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags,numPeaks: 4, action: {\done.postln}) -~sines.play; -~residual.play; +// retrieve the first 2 frames of 4 peaks +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) -// increase the minimum track length -( -FluidBufSines.processBlocking(s, - ~src, - sines:~sines, - residual:~residual, - detectionThreshold:-40, - minTrackLen:15, - action:{"done".postln;} -); -) +// there are only 2 peaks... this is because the distance between 2 peaks has to be clearly segregated in the FFT world. At the default 1024 and the usual SC SR of 44100, this is 43Hz per bin, so 440 and 535 are too near each other... if we reprocess with a higher frame size, we get the right values -~sines.play; -~residual.play; +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags,numPeaks: 4, windowSize: 2048, action: {\done.postln}) -// lower the threshold but increase the track length drastically -( -FluidBufSines.processBlocking(s, - ~src, - sines:~sines, - residual:~residual, - detectionThreshold:-80, - minTrackLen:50, - action:{"done".postln;} -); -) +// first 2 frames of 4 peaks +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) -~sines.play; -~residual.play; +//here is another source for fun, stereo this time, and triangle waves +b = {LFTri.ar([300, 500],mul: [-40, -45].dbamp)}.asBuffer(1) -// increase the threshold drastically but lower the minimum track length -( -FluidBufSines.processBlocking(s, - ~src, - sines:~sines, - residual:~residual, - detectionThreshold:-24, - minTrackLen:1, - action:{"done".postln;} -); -) +b.play +b.query + +// asking for 2 peaks - first and third harmonic of each should pop out +FluidBufSineFeature.process(s, b, frequency: ~freq, magnitude: ~mags, numPeaks: 2, action: {\done.postln}) -~sines.play; -~residual.play; +// retrieving - the stereo values are interleaved, 2 for left 2 for right. +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) -:: \ No newline at end of file diff --git a/example-code/sc/SineFeature.scd b/example-code/sc/SineFeature.scd index 43735fac..1aacde29 100644 --- a/example-code/sc/SineFeature.scd +++ b/example-code/sc/SineFeature.scd @@ -1,55 +1,31 @@ CODE:: -~src = Buffer.readChannel(s,FluidFilesPath("Tremblay-BeatRemember.wav"),channels:[0]); +// a didactic example: a cluster of sinusoids, sorted by magnitudes +{var source = SinOsc.ar([440,789],mul: [0.05,0.1]).sum; FluidSineFeature.kr(source,numPeaks: 3, sortBy: 1).poll; source.dup}.play +// a more exciting example: resynthesizing audio input ( -~synth = { - arg which = 0, detectionThreshold = -96, minTrackLen = 15; - var src = PlayBuf.ar(1,~src,BufRateScale.ir(~src),loop:1); - var sines, residual; - # sines, residual = FluidSines.ar(src,detectionThreshold:detectionThreshold,minTrackLen:minTrackLen); - Select.ar(which,[sines,residual]).dup; -}.play; -) - -~synth.set(\which,1) // residual -~synth.set(\which,0) // back to sinusoids - -// try some different parameters - -// tracks can be short but the detection threshold is higher than the default -~synth.set(\detectionThreshold,-40,\minTrackLen,1) - -// increase the minimum track length -~synth.set(\detectionThreshold,-40,\minTrackLen,15) - -// lower the threshold but increase the track length drastically -~synth.set(\detectionThreshold,-80,\minTrackLen,50) - -// increase the threshold drastically but lower the minimum track length -~synth.set(\detectionThreshold,-24,\minTrackLen,1) -:: -strong::a little more explanation:: -With these settings everything in the sound is considered a sinusoid, even short and quiet peaks. - -Because the decomposition is a windowed process, the detected sinusoidal peaks are located in time based on the window of analysis. When the oscillator changes (even slowly) over time we hear the artefact in the residual output. -code:: - -( -~synth = { - arg which = 0; - var stable = SinOsc.ar(69.midicps,0,0.1); - var oscillating = SinOsc.ar(SinOsc.kr(0.1,0,12,57).midicps,0,0.1); - var sig = SelectX.ar(which.lag(0.1),[stable,oscillating]); - var sines, residual; - # sines, residual = FluidSines.ar(sig,76,-144,-144,-144,1,0,200,1000,0); - [sines, residual * ((which*40) + 1).lag(0.1)] +var buf = Buffer.read(s,FluidFilesPath("Harker-DS-TenOboeMultiphonics-M.wav")); + +x = { + arg nbPeaks = 10, t_hold = 1; + var source = PlayBuf.ar(1, buf, loop: 1); + var analysis = Latch.kr(FluidSineFeature.kr(source,numPeaks: nbPeaks, maxNumPeaks: 50),t_hold); + var resynth = SinOsc.ar(analysis[Array.iota(50)], mul: analysis[Array.iota(50) + 50].dbamp).sum; + [source, resynth]; }.play ) -~synth.set(\which,1); +// play with the number of peaks to track +x.set(\nbPeaks, 5) +x.set(\nbPeaks, 1) +x.set(\nbPeaks, 50) -~synth.set(\which,0); +// trigger the holder to hear the reconstruction on the right hand side +x.set(\t_hold, 1) -:: \ No newline at end of file +// or make it automatic +r = Routine{x.set(\t_hold, 1);0.01.wait;}.loop.play +r.stop +:: From ec3f56422ba0d54493cf6f70d23dbbf01fd23cab Mon Sep 17 00:00:00 2001 From: tremblap Date: Mon, 24 Oct 2022 19:32:46 +0100 Subject: [PATCH 4/8] added MIDI and linMag --- doc/BufSineFeature.rst | 8 ++++++++ doc/SineFeature.rst | 12 ++++++++++-- example-code/sc/BufSineFeature.scd | 9 ++++++--- example-code/sc/SineFeature.scd | 5 ++++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst index 57d6b6e7..04a94dba 100644 --- a/doc/BufSineFeature.rst +++ b/doc/BufSineFeature.rst @@ -52,6 +52,14 @@ How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). +:control freqUnit: + + The units and scale used to report the frequency of the peaks. By default (0), it is in Hz (linear), and the alternative (1) is in MIDI (logarithmic). + +:control magUnit: + + The units and scale used to report the magnitude of the peaks. By default (0), it is in amp (linear), and the alternative (1) is in dB (logarithmic). + :control windowSize: The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ diff --git a/doc/SineFeature.rst b/doc/SineFeature.rst index 13139731..2592f71a 100644 --- a/doc/SineFeature.rst +++ b/doc/SineFeature.rst @@ -8,7 +8,7 @@ This process is tracking peaks in the spectrum, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`Sines`. :process: The audio rate version of the object. -:output: An array of two control streams: [0] is the interpolated frequency of the peaks extracted in Hz, [1] is their respective magnitudes in dB. The latency between the input and the output is windowSize samples. +:output: An array of two control streams: [0] is the interpolated frequency of the peaks extracted in Hz or MIDI, [1] is their respective magnitudes in amp or dB. The latency between the input and the output is windowSize samples. :control in: @@ -25,7 +25,15 @@ :control sortBy: - How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). + How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). + +:control freqUnit: + + The units and scale used to report the frequency of the peaks. By default (0), it is in Hz (linear), and the alternative (1) is in MIDI (logarithmic). + +:control magUnit: + + The units and scale used to report the magnitude of the peaks. By default (0), it is in amp (linear), and the alternative (1) is in dB (logarithmic). :control windowSize: diff --git a/example-code/sc/BufSineFeature.scd b/example-code/sc/BufSineFeature.scd index 6640fb05..ab95ad2a 100644 --- a/example-code/sc/BufSineFeature.scd +++ b/example-code/sc/BufSineFeature.scd @@ -26,7 +26,7 @@ b.play ~freq = Buffer(s); ~mags = Buffer(s); //process -FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags,numPeaks: 4, action: {\done.postln}) +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags, numPeaks: 4, action: {\done.postln}) // retrieve the first 2 frames of 4 peaks ~freq.getn(0, 8, {|x|x.postln}) @@ -34,7 +34,7 @@ FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags,numPeaks: 4, // there are only 2 peaks... this is because the distance between 2 peaks has to be clearly segregated in the FFT world. At the default 1024 and the usual SC SR of 44100, this is 43Hz per bin, so 440 and 535 are too near each other... if we reprocess with a higher frame size, we get the right values -FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags,numPeaks: 4, windowSize: 2048, action: {\done.postln}) +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags, numPeaks: 4, windowSize: 2048, action: {\done.postln}) // first 2 frames of 4 peaks ~freq.getn(0, 8, {|x|x.postln}) @@ -47,9 +47,12 @@ b.play b.query // asking for 2 peaks - first and third harmonic of each should pop out -FluidBufSineFeature.process(s, b, frequency: ~freq, magnitude: ~mags, numPeaks: 2, action: {\done.postln}) +FluidBufSineFeature.process(s, b, frequency: ~freq, magnitude: ~mags, numPeaks: 2, magUnit: 1, action: {\done.postln}) // retrieving - the stereo values are interleaved, 2 for left 2 for right. ~freq.getn(0, 8, {|x|x.postln}) ~mags.getn(0, 8, {|x|x.postln}) +:: + + diff --git a/example-code/sc/SineFeature.scd b/example-code/sc/SineFeature.scd index 1aacde29..2abea808 100644 --- a/example-code/sc/SineFeature.scd +++ b/example-code/sc/SineFeature.scd @@ -4,6 +4,9 @@ CODE:: // a didactic example: a cluster of sinusoids, sorted by magnitudes {var source = SinOsc.ar([440,789],mul: [0.05,0.1]).sum; FluidSineFeature.kr(source,numPeaks: 3, sortBy: 1).poll; source.dup}.play +// or in MIDI and dB +{var source = SinOsc.ar([69,79].midicps,mul: [-40,-35].dbamp).sum; FluidSineFeature.kr(source,numPeaks: 3, sortBy: 1, freqUnit: 1, magUnit: 1).poll; source.dup}.play + // a more exciting example: resynthesizing audio input ( var buf = Buffer.read(s,FluidFilesPath("Harker-DS-TenOboeMultiphonics-M.wav")); @@ -12,7 +15,7 @@ x = { arg nbPeaks = 10, t_hold = 1; var source = PlayBuf.ar(1, buf, loop: 1); var analysis = Latch.kr(FluidSineFeature.kr(source,numPeaks: nbPeaks, maxNumPeaks: 50),t_hold); - var resynth = SinOsc.ar(analysis[Array.iota(50)], mul: analysis[Array.iota(50) + 50].dbamp).sum; + var resynth = SinOsc.ar(analysis[Array.iota(50)], mul: analysis[Array.iota(50) + 50]).sum; [source, resynth]; }.play ) From 4d1ada258e86d18b2bee199ddfe908b317d13a75 Mon Sep 17 00:00:00 2001 From: tremblap Date: Mon, 24 Oct 2022 19:54:04 +0100 Subject: [PATCH 5/8] add padding help --- doc/BufSineFeature.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst index 04a94dba..897f0781 100644 --- a/doc/BufSineFeature.rst +++ b/doc/BufSineFeature.rst @@ -72,6 +72,21 @@ The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. +:control padding: + + Controls the zero-padding added to either end of the source buffer or segment. Padding ensures all values are analysed. Possible values are: + + :enum: + + :0: + No padding - The first analysis window starts at time 0, and the samples at either end will be tapered by the STFT windowing function. + + :1: + Half the window size - The first sample is centred in the analysis window ensuring that the start and end of the segment are accounted for in the analysis. + + :2: + Window size minus the hop size - Mode 2 can be useful when the overlap factor (window size / hop size) is greater than 2, to ensure that the input samples at either end of the segment are covered by the same number of analysis frames as the rest of the analysed material. + :control maxFFTSize: How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. From 5b9c31fab54cfc533ef47e52937f9bed3bc97900 Mon Sep 17 00:00:00 2001 From: tremblap Date: Fri, 11 Nov 2022 16:39:12 -0500 Subject: [PATCH 6/8] change of interface (sortBy to order) --- doc/BufSineFeature.rst | 2 +- doc/SineFeature.rst | 2 +- example-code/sc/SineFeature.scd | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst index 897f0781..c604f925 100644 --- a/doc/BufSineFeature.rst +++ b/doc/BufSineFeature.rst @@ -48,7 +48,7 @@ The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. -:control sortBy: +:control order: How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). diff --git a/doc/SineFeature.rst b/doc/SineFeature.rst index 2592f71a..2537bce6 100644 --- a/doc/SineFeature.rst +++ b/doc/SineFeature.rst @@ -23,7 +23,7 @@ The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. -:control sortBy: +:control order: How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). diff --git a/example-code/sc/SineFeature.scd b/example-code/sc/SineFeature.scd index 2abea808..fb93f711 100644 --- a/example-code/sc/SineFeature.scd +++ b/example-code/sc/SineFeature.scd @@ -2,10 +2,10 @@ CODE:: // a didactic example: a cluster of sinusoids, sorted by magnitudes -{var source = SinOsc.ar([440,789],mul: [0.05,0.1]).sum; FluidSineFeature.kr(source,numPeaks: 3, sortBy: 1).poll; source.dup}.play +{var source = SinOsc.ar([440,789],mul: [0.05,0.1]).sum; FluidSineFeature.kr(source,numPeaks: 3, order: 1).poll; source.dup}.play // or in MIDI and dB -{var source = SinOsc.ar([69,79].midicps,mul: [-40,-35].dbamp).sum; FluidSineFeature.kr(source,numPeaks: 3, sortBy: 1, freqUnit: 1, magUnit: 1).poll; source.dup}.play +{var source = SinOsc.ar([69,79].midicps,mul: [-40,-35].dbamp).sum; FluidSineFeature.kr(source,numPeaks: 3, order: 1, freqUnit: 1, magUnit: 1).poll; source.dup}.play // a more exciting example: resynthesizing audio input ( From fed0a3952e4c568f331157624dc17c4ca247177e Mon Sep 17 00:00:00 2001 From: tremblap Date: Thu, 8 Dec 2022 11:50:41 +0000 Subject: [PATCH 7/8] typo in example code --- example-code/sc/BufSineFeature.scd | 1 + 1 file changed, 1 insertion(+) diff --git a/example-code/sc/BufSineFeature.scd b/example-code/sc/BufSineFeature.scd index ab95ad2a..8968a27b 100644 --- a/example-code/sc/BufSineFeature.scd +++ b/example-code/sc/BufSineFeature.scd @@ -12,6 +12,7 @@ w.view.layout = VLayout( FluidWaveform(~oboe,featuresBuffer: ~mags,standalone: false)); w.front; ) +:: strong::A few didactic examples:: code:: From 66a08f57da600f5369fbf87bb86d822a7c486319 Mon Sep 17 00:00:00 2001 From: tremblap Date: Thu, 8 Dec 2022 12:28:31 +0000 Subject: [PATCH 8/8] typo in header --- doc/BufSineFeature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst index c604f925..a8df2981 100644 --- a/doc/BufSineFeature.rst +++ b/doc/BufSineFeature.rst @@ -3,7 +3,7 @@ :sc-categories: Libraries>FluidDecomposition, UGens>Buffer :sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc :see-also: SineFeature, BufSines -:description: Interpolated Sinusoidal Peak Tracking on the Spectrum of audio stored in a buffer. +:description: Interpolated Sinusoidal Peak Tracking on the Spectrum of Audio Stored in a Buffer. :discussion: This process is tracking peaks in the spectrum of audio stored in a buffer, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`BufSines`.