# Examples - [Mel & MFCC](#mel--mfcc) - [CWT & Synchrosqueezing](#cwt--synchrosqueezing) - [CQT & Chroma](#cqt--chroma) - [Different Wavelet Type](#different-wavelet-type) - [Spectral Features](#spectral-features) - [Pitch Estimate](#pitch-estimate) - [Onset Detection](#onset-detection) - [Harmonic Percussive Source Separation](#harmonic-percussive-source-separation) ## Mel & MFCC Mel spectrogram and Mel-frequency cepstral coefficients ```python import numpy as np import audioflux as af import matplotlib.pyplot as plt from audioflux.display import fill_spec # Get a 220Hz's audio file path sample_path = af.utils.sample_path('220') # Read audio data and sample rate audio_arr, sr = af.read(sample_path) # Extract mel spectrogram spec_arr, mel_fre_band_arr = af.mel_spectrogram(audio_arr, num=128, radix2_exp=12, samplate=sr) spec_arr = np.abs(spec_arr) # Extract mfcc mfcc_arr, _ = af.mfcc(audio_arr, cc_num=13, mel_num=128, radix2_exp=12, samplate=sr) # Display audio_len = audio_arr.shape[-1] # calculate x/y-coords x_coords = np.linspace(0, audio_len / sr, spec_arr.shape[-1] + 1) y_coords = np.insert(mel_fre_band_arr, 0, 0) fig, ax = plt.subplots() img = fill_spec(spec_arr, axes=ax, x_coords=x_coords, y_coords=y_coords, x_axis='time', y_axis='log', title='Mel Spectrogram') fig.colorbar(img, ax=ax) fig, ax = plt.subplots() img = fill_spec(mfcc_arr, axes=ax, x_coords=x_coords, x_axis='time', title='MFCC') fig.colorbar(img, ax=ax) plt.show() ``` ## CWT & Synchrosqueezing Continuous Wavelet Transform spectrogram and its corresponding synchrosqueezing reassignment spectrogram ```python import numpy as np import audioflux as af from audioflux.type import SpectralFilterBankScaleType, WaveletContinueType from audioflux.utils import note_to_hz import matplotlib.pyplot as plt from audioflux.display import fill_spec # Get a 220Hz's audio file path sample_path = af.utils.sample_path('220') # Read audio data and sample rate audio_arr, sr = af.read(sample_path) audio_arr = audio_arr[..., :4096] cwt_obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'), bin_per_octave=12, wavelet_type=WaveletContinueType.MORSE, scale_type=SpectralFilterBankScaleType.OCTAVE) cwt_spec_arr = cwt_obj.cwt(audio_arr) synsq_obj = af.Synsq(num=cwt_obj.num, radix2_exp=cwt_obj.radix2_exp, samplate=cwt_obj.samplate) synsq_arr = synsq_obj.synsq(cwt_spec_arr, filter_bank_type=cwt_obj.scale_type, fre_arr=cwt_obj.get_fre_band_arr()) # Show CWT fig, ax = plt.subplots(figsize=(7, 4)) img = fill_spec(np.abs(cwt_spec_arr), axes=ax, x_coords=cwt_obj.x_coords(), y_coords=cwt_obj.y_coords(), x_axis='time', y_axis='log', title='CWT') fig.colorbar(img, ax=ax) # Show Synsq fig, ax = plt.subplots(figsize=(7, 4)) img = fill_spec(np.abs(synsq_arr), axes=ax, x_coords=cwt_obj.x_coords(), y_coords=cwt_obj.y_coords(), x_axis='time', y_axis='log', title='Synsq') fig.colorbar(img, ax=ax) plt.show() ``` ## CQT & Chroma Constant-Q Transform spectrogram and cqt_chroma ```python import numpy as np import audioflux as af import matplotlib.pyplot as plt from audioflux.display import fill_spec # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2')) # Create CQT object cqt_obj = af.CQT(num=84, samplate=sr) # Extract CQT and Chroma_cqt cqt_arr = cqt_obj.cqt(audio_arr) chroma_cqt_arr = cqt_obj.chroma(cqt_arr) # Display audio_len = audio_arr.shape[-1] # Display CQT fig, ax = plt.subplots() img = fill_spec(np.abs(cqt_arr), axes=ax, x_coords=cqt_obj.x_coords(audio_len), x_axis='time', y_coords=cqt_obj.y_coords(), y_axis='log', title='CQT') fig.colorbar(img, ax=ax) # Display Chroma_CQT fig, ax = plt.subplots() img = fill_spec(chroma_cqt_arr, axes=ax, x_coords=cqt_obj.x_coords(audio_len), x_axis='time', y_axis='chroma', title='Chroma-CQT') fig.colorbar(img, ax=ax) plt.show() ``` ## Different Wavelet Type Compare morlet, morese, paul and bump Wavelet type spectrogram ```python import numpy as np import audioflux as af from audioflux.type import SpectralFilterBankScaleType, WaveletContinueType from audioflux.utils import note_to_hz import matplotlib.pyplot as plt from audioflux.display import fill_spec, fill_wave # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('220')) audio_arr = audio_arr[..., :4096] # Create CWT object of Octave and Extract spectrogram obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'), bin_per_octave=12, wavelet_type=WaveletContinueType.MORLET, scale_type=SpectralFilterBankScaleType.OCTAVE) morlet_spec_arr = obj.cwt(audio_arr) morlet_spec_arr = np.abs(morlet_spec_arr) obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'), bin_per_octave=12, wavelet_type=WaveletContinueType.MORSE, scale_type=SpectralFilterBankScaleType.OCTAVE) morse_spec_arr = obj.cwt(audio_arr) morse_spec_arr = np.abs(morse_spec_arr) obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'), bin_per_octave=12, wavelet_type=WaveletContinueType.PAUL, scale_type=SpectralFilterBankScaleType.OCTAVE) paul_spec_arr = obj.cwt(audio_arr) paul_spec_arr = np.abs(paul_spec_arr) obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'), bin_per_octave=12, wavelet_type=WaveletContinueType.BUMP, scale_type=SpectralFilterBankScaleType.OCTAVE) bump_spec_arr = obj.cwt(audio_arr) bump_spec_arr = np.abs(bump_spec_arr) # Display fig, ax = plt.subplots(nrows=5, figsize=(8, 10), sharex=True) fill_wave(audio_arr, samplate=sr, axes=ax[0]) fill_spec(morlet_spec_arr, axes=ax[1], x_coords=obj.x_coords(), y_coords=obj.y_coords(), y_axis='log', title='CWT-Octave MORLET Spectrogram') fill_spec(morse_spec_arr, axes=ax[2], x_coords=obj.x_coords(), y_coords=obj.y_coords(), y_axis='log', title='CWT-Octave MORSE Spectrogram') fill_spec(paul_spec_arr, axes=ax[3], x_coords=obj.x_coords(), y_coords=obj.y_coords(), y_axis='log', title='CWT-Octave PAUL Spectrogram') fill_spec(bump_spec_arr, axes=ax[4], x_coords=obj.x_coords(), y_coords=obj.y_coords(), x_axis='time', y_axis='log', title='CWT-Octave BUMP Spectrogram') plt.show() ``` ## Spectral Features Flatness, Novelty, Entropy, RMS and Slope features ```python import numpy as np import audioflux as af from audioflux.type import SpectralDataType, SpectralFilterBankScaleType import matplotlib.pyplot as plt from audioflux.display import fill_wave, fill_plot, fill_spec # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2')) audio_len = audio_arr.shape[-1] bft_obj = af.BFT(num=256, samplate=sr, radix2_exp=12, slide_length=1024, data_type=SpectralDataType.MAG, scale_type=SpectralFilterBankScaleType.LINEAR) spec_arr = bft_obj.bft(audio_arr) spec_arr = np.abs(spec_arr) # Create Spectral object and extract spectral feature spectral_obj = af.Spectral(num=bft_obj.num, fre_band_arr=bft_obj.get_fre_band_arr()) spectral_obj.set_time_length(spec_arr.shape[-1]) flatness_arr = spectral_obj.flatness(spec_arr) novelty_arr = spectral_obj.novelty(spec_arr) entropy_arr = spectral_obj.entropy(spec_arr) rms_arr = spectral_obj.rms(spec_arr) slope_arr = spectral_obj.slope(spec_arr) # Display fig, ax = plt.subplots(nrows=7, figsize=(8, 10), sharex=True) times = np.arange(0, flatness_arr.shape[-1]) * (bft_obj.slide_length / bft_obj.samplate) fill_wave(audio_arr, samplate=sr, axes=ax[0]) fill_spec(spec_arr, axes=ax[1], x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(), y_axis='log') fill_plot(times, flatness_arr, axes=ax[2], label='flatness') fill_plot(times, novelty_arr, axes=ax[3], label='novelty') fill_plot(times, entropy_arr, axes=ax[4], label='entropy') fill_plot(times, rms_arr, axes=ax[5], label='rms_arr') fill_plot(times, slope_arr, axes=ax[6], label='slope_arr') plt.show() ``` ## Pitch Estimate Fundamental frequency (F0) estimation using the YIN algorithm. ```python import numpy as np import audioflux as af from audioflux.type import PitchType import matplotlib.pyplot as plt from audioflux.display import fill_wave # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('voice')) obj = af.Pitch(pitch_type=PitchType.YIN) fre_arr, value_arr1, value_arr2 = obj.pitch(audio_arr) fre_arr[fre_arr < 1] = np.nan # Display fig, ax = plt.subplots(nrows=2, figsize=(8, 6), sharex=True) times = np.arange(0, fre_arr.shape[-1]) * (obj.slide_length / obj.samplate) fill_wave(audio_arr, samplate=sr, axes=ax[0]) ax[1].xaxis.set_label_text("Time(s)") ax[1].yaxis.set_label_text("Frequency(Hz)") ax[1].plot(times, fre_arr, label='fre', linewidth=3) # set real plot real_fre_arr = np.zeros_like(fre_arr) real_fre_arr[25:48] = 261.6 real_fre_arr[56:78] = 293.7 real_fre_arr[87:107] = 329.6 real_fre_arr[118:135] = 349.2 real_fre_arr[150:169] = 392.0 real_fre_arr[179:200] = 440.0 real_fre_arr[212:243] = 493.9 real_fre_arr[real_fre_arr == 0] = np.nan ax[1].plot(times, real_fre_arr, color='red', label='fre', linewidth=2) plt.show() ``` ## Onset Detection Locate note onset events by picking peaks in an onset strength envelope. ```python import numpy as np import audioflux as af from audioflux.type import SpectralFilterBankScaleType, SpectralDataType, NoveltyType import matplotlib.pyplot as plt from audioflux.display import fill_wave, fill_plot, fill_spec # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('guitar_chord2')) bft_obj = af.BFT(num=128, samplate=sr, radix2_exp=11, slide_length=2048, scale_type=SpectralFilterBankScaleType.MEL, data_type=SpectralDataType.POWER) spec_arr = bft_obj.bft(audio_arr) spec_dB_arr = af.utils.power_to_db(np.abs(spec_arr)) n_fre, n_time = spec_dB_arr.shape onset_obj = af.Onset(time_length=n_time, fre_length=n_fre, slide_length=bft_obj.slide_length, samplate=bft_obj.samplate, novelty_type=NoveltyType.FLUX) point_arr, evn_arr, time_arr, value_arr = onset_obj.onset(spec_dB_arr) audio_len = audio_arr.shape[-1] fig, axes = plt.subplots(nrows=3, sharex=True) img = fill_spec(spec_dB_arr, axes=axes[0], x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(), x_axis='time', y_axis='log', title='Onset') ax = fill_wave(audio_arr, samplate=sr, axes=axes[1]) ax.vlines(time_arr, -1, 1, color='r', alpha=0.9, linestyle='--', label='Onsets') times = np.arange(0, evn_arr.shape[-1]) * (bft_obj.slide_length / sr) ax = fill_plot(times, evn_arr, axes=axes[2], label='Onset strength') ax.vlines(time_arr, evn_arr.min(), evn_arr.max(), color='r', alpha=0.9, linestyle='--', label='Onsets') plt.show() ``` ### Harmonic Percussive Source Separation Decompose audio into harmonic and percussive components ```python import audioflux as af from audioflux.type import SpectralDataType, WindowType, SpectralFilterBankScaleType import matplotlib.pyplot as plt from audioflux.display import fill_wave, fill_spec # Read audio data and sample rate audio_arr, sr = af.read(af.utils.sample_path('chord_metronome2')) # Compute the hpss radix2_exp = 11 slide_length = (1 << radix2_exp) // 4 hpss_obj = af.HPSS(radix2_exp=radix2_exp, window_type=WindowType.HAMM, slide_length=slide_length, h_order=21, p_order=31) h_arr, p_arr = hpss_obj.hpss(audio_arr) # Extract Linear spectrogram bft_obj = af.BFT(num=2049, radix2_exp=12, samplate=sr, data_type=SpectralDataType.POWER, scale_type=SpectralFilterBankScaleType.LINEAR) audio_arr = audio_arr[..., :h_arr.shape[-1]] origin_spec_arr = bft_obj.bft(audio_arr, result_type=1) h_spec_arr = bft_obj.bft(h_arr, result_type=1) p_spec_arr = bft_obj.bft(p_arr, result_type=1) origin_spec_arr = af.utils.power_to_abs_db(origin_spec_arr) h_spec_arr = af.utils.power_to_abs_db(h_spec_arr) p_spec_arr = af.utils.power_to_abs_db(p_spec_arr) # Display fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True) fill_wave(audio_arr, samplate=sr, axes=axes[0]) fill_wave(h_arr, samplate=sr, axes=axes[1]) fill_wave(p_arr, samplate=sr, axes=axes[2]) audio_len = audio_arr.shape[-1] fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True) fill_spec(origin_spec_arr, axes=axes[0], x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(), y_axis='log', title='origin spec') fill_spec(h_spec_arr, axes=axes[1], x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(), y_axis='log', title='h spec') fill_spec(p_spec_arr, axes=axes[2], x_coords=bft_obj.x_coords(audio_len), y_coords=bft_obj.y_coords(), y_axis='log', title='p spec') plt.show() ```